• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Stand-alone page-table allocator for hyp stage-1 and guest stage-2.
4  * No bombay mix was harmed in the writing of this file.
5  *
6  * Copyright (C) 2020 Google LLC
7  * Author: Will Deacon <will@kernel.org>
8  */
9 
10 #include <linux/bitfield.h>
11 #include <asm/kvm_pgtable.h>
12 #include <asm/stage2_pgtable.h>
13 
14 
15 #define KVM_PTE_LEAF_ATTR_S2_PERMS	(KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \
16 					 KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
17 					 KVM_PTE_LEAF_ATTR_HI_S2_XN)
18 
19 struct kvm_pgtable_walk_data {
20 	struct kvm_pgtable		*pgt;
21 	struct kvm_pgtable_walker	*walker;
22 
23 	u64				addr;
24 	u64				end;
25 };
26 
kvm_phys_is_valid(u64 phys)27 static bool kvm_phys_is_valid(u64 phys)
28 {
29 	return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_EL1_PARANGE_MAX));
30 }
31 
kvm_block_mapping_supported(u64 addr,u64 end,u64 phys,u32 level)32 static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level)
33 {
34 	u64 granule = kvm_granule_size(level);
35 
36 	if (!kvm_level_supports_block_mapping(level))
37 		return false;
38 
39 	if (granule > (end - addr))
40 		return false;
41 
42 	if (kvm_phys_is_valid(phys) && !IS_ALIGNED(phys, granule))
43 		return false;
44 
45 	return IS_ALIGNED(addr, granule);
46 }
47 
kvm_pgtable_idx(struct kvm_pgtable_walk_data * data,u32 level)48 static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level)
49 {
50 	u64 shift = kvm_granule_shift(level);
51 	u64 mask = BIT(PAGE_SHIFT - 3) - 1;
52 
53 	return (data->addr >> shift) & mask;
54 }
55 
__kvm_pgd_page_idx(struct kvm_pgtable * pgt,u64 addr)56 static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr)
57 {
58 	u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */
59 	u64 mask = BIT(pgt->ia_bits) - 1;
60 
61 	return (addr & mask) >> shift;
62 }
63 
kvm_pgd_page_idx(struct kvm_pgtable_walk_data * data)64 static u32 kvm_pgd_page_idx(struct kvm_pgtable_walk_data *data)
65 {
66 	return __kvm_pgd_page_idx(data->pgt, data->addr);
67 }
68 
kvm_pgd_pages(u32 ia_bits,u32 start_level)69 static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
70 {
71 	struct kvm_pgtable pgt = {
72 		.ia_bits	= ia_bits,
73 		.start_level	= start_level,
74 	};
75 
76 	return __kvm_pgd_page_idx(&pgt, -1ULL) + 1;
77 }
78 
kvm_clear_pte(kvm_pte_t * ptep)79 static void kvm_clear_pte(kvm_pte_t *ptep)
80 {
81 	WRITE_ONCE(*ptep, 0);
82 }
83 
kvm_set_table_pte(kvm_pte_t * ptep,kvm_pte_t * childp,struct kvm_pgtable_mm_ops * mm_ops)84 static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp,
85 			      struct kvm_pgtable_mm_ops *mm_ops)
86 {
87 	kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(mm_ops->virt_to_phys(childp));
88 
89 	pte |= FIELD_PREP(KVM_PTE_TYPE, KVM_PTE_TYPE_TABLE);
90 	pte |= KVM_PTE_VALID;
91 
92 	WARN_ON(kvm_pte_valid(old));
93 	smp_store_release(ptep, pte);
94 }
95 
kvm_init_valid_leaf_pte(u64 pa,kvm_pte_t attr,u32 level)96 static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, u32 level)
97 {
98 	kvm_pte_t pte = kvm_phys_to_pte(pa);
99 	u64 type = (level == KVM_PGTABLE_MAX_LEVELS - 1) ? KVM_PTE_TYPE_PAGE :
100 							   KVM_PTE_TYPE_BLOCK;
101 
102 	pte |= attr & (KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI);
103 	pte |= FIELD_PREP(KVM_PTE_TYPE, type);
104 	pte |= KVM_PTE_VALID;
105 
106 	return pte;
107 }
108 
kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data * data,u64 addr,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag)109 static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr,
110 				  u32 level, kvm_pte_t *ptep,
111 				  enum kvm_pgtable_walk_flags flag)
112 {
113 	struct kvm_pgtable_walker *walker = data->walker;
114 	return walker->cb(addr, data->end, level, ptep, flag, walker->arg);
115 }
116 
117 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
118 			      kvm_pte_t *pgtable, u32 level);
119 
__kvm_pgtable_visit(struct kvm_pgtable_walk_data * data,kvm_pte_t * ptep,u32 level)120 static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
121 				      kvm_pte_t *ptep, u32 level)
122 {
123 	int ret = 0;
124 	u64 addr = data->addr;
125 	kvm_pte_t *childp, pte = *ptep;
126 	bool table = kvm_pte_table(pte, level);
127 	enum kvm_pgtable_walk_flags flags = data->walker->flags;
128 
129 	if (table && (flags & KVM_PGTABLE_WALK_TABLE_PRE)) {
130 		ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
131 					     KVM_PGTABLE_WALK_TABLE_PRE);
132 	}
133 
134 	if (!table && (flags & KVM_PGTABLE_WALK_LEAF)) {
135 		ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
136 					     KVM_PGTABLE_WALK_LEAF);
137 		pte = *ptep;
138 		table = kvm_pte_table(pte, level);
139 	}
140 
141 	if (ret)
142 		goto out;
143 
144 	if (!table) {
145 		data->addr = ALIGN_DOWN(data->addr, kvm_granule_size(level));
146 		data->addr += kvm_granule_size(level);
147 		goto out;
148 	}
149 
150 	childp = kvm_pte_follow(pte, data->pgt->mm_ops);
151 	ret = __kvm_pgtable_walk(data, childp, level + 1);
152 	if (ret)
153 		goto out;
154 
155 	if (flags & KVM_PGTABLE_WALK_TABLE_POST) {
156 		ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
157 					     KVM_PGTABLE_WALK_TABLE_POST);
158 	}
159 
160 out:
161 	return ret;
162 }
163 
__kvm_pgtable_walk(struct kvm_pgtable_walk_data * data,kvm_pte_t * pgtable,u32 level)164 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
165 			      kvm_pte_t *pgtable, u32 level)
166 {
167 	u32 idx;
168 	int ret = 0;
169 
170 	if (WARN_ON_ONCE(level >= KVM_PGTABLE_MAX_LEVELS))
171 		return -EINVAL;
172 
173 	for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) {
174 		kvm_pte_t *ptep = &pgtable[idx];
175 
176 		if (data->addr >= data->end)
177 			break;
178 
179 		ret = __kvm_pgtable_visit(data, ptep, level);
180 		if (ret)
181 			break;
182 	}
183 
184 	return ret;
185 }
186 
_kvm_pgtable_walk(struct kvm_pgtable_walk_data * data)187 static int _kvm_pgtable_walk(struct kvm_pgtable_walk_data *data)
188 {
189 	u32 idx;
190 	int ret = 0;
191 	struct kvm_pgtable *pgt = data->pgt;
192 	u64 limit = BIT(pgt->ia_bits);
193 
194 	if (data->addr > limit || data->end > limit)
195 		return -ERANGE;
196 
197 	if (!pgt->pgd)
198 		return -EINVAL;
199 
200 	for (idx = kvm_pgd_page_idx(data); data->addr < data->end; ++idx) {
201 		kvm_pte_t *ptep = &pgt->pgd[idx * PTRS_PER_PTE];
202 
203 		ret = __kvm_pgtable_walk(data, ptep, pgt->start_level);
204 		if (ret)
205 			break;
206 	}
207 
208 	return ret;
209 }
210 
kvm_pgtable_walk(struct kvm_pgtable * pgt,u64 addr,u64 size,struct kvm_pgtable_walker * walker)211 int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
212 		     struct kvm_pgtable_walker *walker)
213 {
214 	struct kvm_pgtable_walk_data walk_data = {
215 		.pgt	= pgt,
216 		.addr	= ALIGN_DOWN(addr, PAGE_SIZE),
217 		.end	= PAGE_ALIGN(walk_data.addr + size),
218 		.walker	= walker,
219 	};
220 
221 	return _kvm_pgtable_walk(&walk_data);
222 }
223 
224 struct leaf_walk_data {
225 	kvm_pte_t	pte;
226 	u32		level;
227 };
228 
leaf_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)229 static int leaf_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
230 		       enum kvm_pgtable_walk_flags flag, void * const arg)
231 {
232 	struct leaf_walk_data *data = arg;
233 
234 	data->pte   = *ptep;
235 	data->level = level;
236 
237 	return 0;
238 }
239 
kvm_pgtable_get_leaf(struct kvm_pgtable * pgt,u64 addr,kvm_pte_t * ptep,u32 * level)240 int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
241 			 kvm_pte_t *ptep, u32 *level)
242 {
243 	struct leaf_walk_data data;
244 	struct kvm_pgtable_walker walker = {
245 		.cb	= leaf_walker,
246 		.flags	= KVM_PGTABLE_WALK_LEAF,
247 		.arg	= &data,
248 	};
249 	int ret;
250 
251 	ret = kvm_pgtable_walk(pgt, ALIGN_DOWN(addr, PAGE_SIZE),
252 			       PAGE_SIZE, &walker);
253 	if (!ret) {
254 		if (ptep)
255 			*ptep  = data.pte;
256 		if (level)
257 			*level = data.level;
258 	}
259 
260 	return ret;
261 }
262 
263 struct hyp_map_data {
264 	u64				phys;
265 	kvm_pte_t			attr;
266 	struct kvm_pgtable_mm_ops	*mm_ops;
267 };
268 
hyp_set_prot_attr(enum kvm_pgtable_prot prot,kvm_pte_t * ptep)269 static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
270 {
271 	u32 ap = (prot & KVM_PGTABLE_PROT_W) ? KVM_PTE_LEAF_ATTR_LO_S1_AP_RW :
272 					       KVM_PTE_LEAF_ATTR_LO_S1_AP_RO;
273 	bool device = prot & KVM_PGTABLE_PROT_DEVICE;
274 	u32 sh = KVM_PTE_LEAF_ATTR_LO_S1_SH_IS;
275 	bool nc = prot & KVM_PGTABLE_PROT_NC;
276 	kvm_pte_t attr;
277 	u32 mtype;
278 
279 	if (!(prot & KVM_PGTABLE_PROT_R) || (device && nc) ||
280 			(prot & (KVM_PGTABLE_PROT_PXN | KVM_PGTABLE_PROT_UXN)))
281 		return -EINVAL;
282 
283 	if (device)
284 		mtype = MT_DEVICE_nGnRnE;
285 	else if (nc)
286 		mtype = MT_NORMAL_NC;
287 	else
288 		mtype = MT_NORMAL;
289 
290 	attr = FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX, mtype);
291 
292 	if (prot & KVM_PGTABLE_PROT_X) {
293 		if (prot & KVM_PGTABLE_PROT_W)
294 			return -EINVAL;
295 
296 		if (device)
297 			return -EINVAL;
298 	} else {
299 		attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN;
300 	}
301 
302 	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap);
303 	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
304 	attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF;
305 	attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
306 	*ptep = attr;
307 
308 	return 0;
309 }
310 
kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)311 enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)
312 {
313 	enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW;
314 	u32 ap;
315 
316 	if (!kvm_pte_valid(pte))
317 		return prot;
318 
319 	if (!(pte & KVM_PTE_LEAF_ATTR_HI_S1_XN))
320 		prot |= KVM_PGTABLE_PROT_X;
321 
322 	ap = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_AP, pte);
323 	if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RO)
324 		prot |= KVM_PGTABLE_PROT_R;
325 	else if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RW)
326 		prot |= KVM_PGTABLE_PROT_RW;
327 
328 	return prot;
329 }
330 
hyp_map_walker_try_leaf(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,struct hyp_map_data * data)331 static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
332 				    kvm_pte_t *ptep, struct hyp_map_data *data)
333 {
334 	kvm_pte_t new, old = *ptep;
335 	u64 granule = kvm_granule_size(level), phys = data->phys;
336 
337 	if (!kvm_block_mapping_supported(addr, end, phys, level))
338 		return false;
339 
340 	data->phys += granule;
341 	new = kvm_init_valid_leaf_pte(phys, data->attr, level);
342 	if (old == new)
343 		return true;
344 	if (!kvm_pte_valid(old))
345 		data->mm_ops->get_page(ptep);
346 	else if (WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW))
347 		return false;
348 
349 	smp_store_release(ptep, new);
350 	return true;
351 }
352 
hyp_map_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)353 static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
354 			  enum kvm_pgtable_walk_flags flag, void * const arg)
355 {
356 	kvm_pte_t *childp;
357 	struct hyp_map_data *data = arg;
358 	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
359 
360 	if (hyp_map_walker_try_leaf(addr, end, level, ptep, arg))
361 		return 0;
362 
363 	if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1))
364 		return -EINVAL;
365 
366 	childp = (kvm_pte_t *)mm_ops->zalloc_page(NULL);
367 	if (!childp)
368 		return -ENOMEM;
369 
370 	kvm_set_table_pte(ptep, childp, mm_ops);
371 	mm_ops->get_page(ptep);
372 	return 0;
373 }
374 
kvm_pgtable_hyp_map(struct kvm_pgtable * pgt,u64 addr,u64 size,u64 phys,enum kvm_pgtable_prot prot)375 int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
376 			enum kvm_pgtable_prot prot)
377 {
378 	int ret;
379 	struct hyp_map_data map_data = {
380 		.phys	= ALIGN_DOWN(phys, PAGE_SIZE),
381 		.mm_ops	= pgt->mm_ops,
382 	};
383 	struct kvm_pgtable_walker walker = {
384 		.cb	= hyp_map_walker,
385 		.flags	= KVM_PGTABLE_WALK_LEAF,
386 		.arg	= &map_data,
387 	};
388 
389 	ret = hyp_set_prot_attr(prot, &map_data.attr);
390 	if (ret)
391 		return ret;
392 
393 	ret = kvm_pgtable_walk(pgt, addr, size, &walker);
394 	dsb(ishst);
395 	isb();
396 	return ret;
397 }
398 
399 struct hyp_unmap_data {
400 	u64				unmapped;
401 	struct kvm_pgtable_mm_ops	*mm_ops;
402 };
403 
hyp_unmap_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)404 static int hyp_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
405 			    enum kvm_pgtable_walk_flags flag, void * const arg)
406 {
407 	kvm_pte_t pte = *ptep, *childp = NULL;
408 	u64 granule = kvm_granule_size(level);
409 	struct hyp_unmap_data *data = arg;
410 	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
411 
412 	if (!kvm_pte_valid(pte))
413 		return -EINVAL;
414 
415 	if (kvm_pte_table(pte, level)) {
416 		childp = kvm_pte_follow(pte, mm_ops);
417 
418 		if (mm_ops->page_count(childp) != 1)
419 			return 0;
420 
421 		kvm_clear_pte(ptep);
422 		dsb(ishst);
423 		__tlbi_level(vae2is, __TLBI_VADDR(addr, 0), level);
424 	} else {
425 		if (end - addr < granule)
426 			return -EINVAL;
427 
428 		kvm_clear_pte(ptep);
429 		dsb(ishst);
430 		__tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level);
431 		data->unmapped += granule;
432 	}
433 
434 	dsb(ish);
435 	isb();
436 	mm_ops->put_page(ptep);
437 
438 	if (childp)
439 		mm_ops->put_page(childp);
440 
441 	return 0;
442 }
443 
kvm_pgtable_hyp_unmap(struct kvm_pgtable * pgt,u64 addr,u64 size)444 u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
445 {
446 	struct hyp_unmap_data unmap_data = {
447 		.mm_ops	= pgt->mm_ops,
448 	};
449 	struct kvm_pgtable_walker walker = {
450 		.cb	= hyp_unmap_walker,
451 		.arg	= &unmap_data,
452 		.flags	= KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
453 	};
454 
455 	if (!pgt->mm_ops->page_count)
456 		return 0;
457 
458 	kvm_pgtable_walk(pgt, addr, size, &walker);
459 	return unmap_data.unmapped;
460 }
461 
kvm_pgtable_hyp_init(struct kvm_pgtable * pgt,u32 va_bits,struct kvm_pgtable_mm_ops * mm_ops)462 int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
463 			 struct kvm_pgtable_mm_ops *mm_ops)
464 {
465 	u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits);
466 
467 	pgt->pgd = (kvm_pte_t *)mm_ops->zalloc_page(NULL);
468 	if (!pgt->pgd)
469 		return -ENOMEM;
470 
471 	pgt->ia_bits		= va_bits;
472 	pgt->start_level	= KVM_PGTABLE_MAX_LEVELS - levels;
473 	pgt->mm_ops		= mm_ops;
474 	pgt->mmu		= NULL;
475 	pgt->pte_ops		= NULL;
476 
477 	return 0;
478 }
479 
hyp_free_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)480 static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
481 			   enum kvm_pgtable_walk_flags flag, void * const arg)
482 {
483 	struct kvm_pgtable_mm_ops *mm_ops = arg;
484 	kvm_pte_t pte = *ptep;
485 
486 	if (!kvm_pte_valid(pte))
487 		return 0;
488 
489 	mm_ops->put_page(ptep);
490 
491 	if (kvm_pte_table(pte, level))
492 		mm_ops->put_page(kvm_pte_follow(pte, mm_ops));
493 
494 	return 0;
495 }
496 
kvm_pgtable_hyp_destroy(struct kvm_pgtable * pgt)497 void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
498 {
499 	struct kvm_pgtable_walker walker = {
500 		.cb	= hyp_free_walker,
501 		.flags	= KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
502 		.arg	= pgt->mm_ops,
503 	};
504 
505 	WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
506 	pgt->mm_ops->put_page(pgt->pgd);
507 	pgt->pgd = NULL;
508 }
509 
510 struct stage2_map_data {
511 	u64				phys;
512 	kvm_pte_t			attr;
513 	u64				annotation;
514 
515 	kvm_pte_t			*anchor;
516 	kvm_pte_t			*childp;
517 
518 	struct kvm_s2_mmu		*mmu;
519 	void				*memcache;
520 
521 	struct kvm_pgtable_mm_ops	*mm_ops;
522 
523 	/* Force mappings to page granularity */
524 	bool				force_pte;
525 };
526 
kvm_get_vtcr(u64 mmfr0,u64 mmfr1,u32 phys_shift)527 u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
528 {
529 	u64 vtcr = VTCR_EL2_FLAGS;
530 	u8 lvls;
531 
532 	vtcr |= kvm_get_parange(mmfr0) << VTCR_EL2_PS_SHIFT;
533 	vtcr |= VTCR_EL2_T0SZ(phys_shift);
534 	/*
535 	 * Use a minimum 2 level page table to prevent splitting
536 	 * host PMD huge pages at stage2.
537 	 */
538 	lvls = stage2_pgtable_levels(phys_shift);
539 	if (lvls < 2)
540 		lvls = 2;
541 	vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls);
542 
543 	/*
544 	 * Enable the Hardware Access Flag management, unconditionally
545 	 * on all CPUs. The features is RES0 on CPUs without the support
546 	 * and must be ignored by the CPUs.
547 	 */
548 	vtcr |= VTCR_EL2_HA;
549 
550 	/* Set the vmid bits */
551 	vtcr |= (get_vmid_bits(mmfr1) == 16) ?
552 		VTCR_EL2_VS_16BIT :
553 		VTCR_EL2_VS_8BIT;
554 
555 	return vtcr;
556 }
557 
stage2_has_fwb(struct kvm_pgtable * pgt)558 static bool stage2_has_fwb(struct kvm_pgtable *pgt)
559 {
560 	if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
561 		return false;
562 
563 	return !(pgt->flags & KVM_PGTABLE_S2_NOFWB);
564 }
565 
566 #define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))
567 
stage2_set_prot_attr(struct kvm_pgtable * pgt,enum kvm_pgtable_prot prot,kvm_pte_t * ptep)568 static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
569 		kvm_pte_t *ptep)
570 {
571 	u64 exec_type = KVM_PTE_LEAF_ATTR_HI_S2_XN_XN;
572 	bool device = prot & KVM_PGTABLE_PROT_DEVICE;
573 	u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;
574 	bool nc = prot & KVM_PGTABLE_PROT_NC;
575 	enum kvm_pgtable_prot exec_prot;
576 	kvm_pte_t attr;
577 
578 	if (device)
579 		attr = KVM_S2_MEMATTR(pgt, DEVICE_nGnRE);
580 	else if (nc)
581 		attr = KVM_S2_MEMATTR(pgt, NORMAL_NC);
582 	else
583 		attr = KVM_S2_MEMATTR(pgt, NORMAL);
584 
585 	exec_prot = prot & (KVM_PGTABLE_PROT_X | KVM_PGTABLE_PROT_PXN | KVM_PGTABLE_PROT_UXN);
586 	switch(exec_prot) {
587 	case KVM_PGTABLE_PROT_X:
588 		goto set_ap;
589 	case KVM_PGTABLE_PROT_PXN:
590 		exec_type = KVM_PTE_LEAF_ATTR_HI_S2_XN_PXN;
591 		break;
592 	case KVM_PGTABLE_PROT_UXN:
593 		exec_type = KVM_PTE_LEAF_ATTR_HI_S2_XN_UXN;
594 		break;
595 	default:
596 		if (exec_prot)
597 			return -EINVAL;
598 	}
599 	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, exec_type);
600 
601 set_ap:
602 	if (prot & KVM_PGTABLE_PROT_R)
603 		attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
604 
605 	if (prot & KVM_PGTABLE_PROT_W)
606 		attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
607 
608 	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
609 	attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
610 	attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
611 	*ptep = attr;
612 
613 	return 0;
614 }
615 
kvm_pgtable_stage2_pte_prot(kvm_pte_t pte)616 enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte)
617 {
618 	enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW;
619 
620 	if (!kvm_pte_valid(pte))
621 		return prot;
622 
623 	if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R)
624 		prot |= KVM_PGTABLE_PROT_R;
625 	if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W)
626 		prot |= KVM_PGTABLE_PROT_W;
627 	switch(FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, pte)) {
628 	case 0:
629 		prot |= KVM_PGTABLE_PROT_X;
630 		break;
631 	case KVM_PTE_LEAF_ATTR_HI_S2_XN_PXN:
632 		prot |= KVM_PGTABLE_PROT_PXN;
633 		break;
634 	case KVM_PTE_LEAF_ATTR_HI_S2_XN_UXN:
635 		prot |= KVM_PGTABLE_PROT_UXN;
636 		break;
637 	case KVM_PTE_LEAF_ATTR_HI_S2_XN_XN:
638 		break;
639 	default:
640 		WARN_ON(1);
641 	}
642 
643 	return prot;
644 }
645 
stage2_pte_needs_update(struct kvm_pgtable * pgt,kvm_pte_t old,kvm_pte_t new)646 static bool stage2_pte_needs_update(struct kvm_pgtable *pgt,
647 				    kvm_pte_t old, kvm_pte_t new)
648 {
649 	/* Following filter logic applies only to guest stage-2 entries. */
650 	if (pgt->flags & KVM_PGTABLE_S2_IDMAP)
651 		return true;
652 
653 	if (!kvm_pte_valid(old) || !kvm_pte_valid(new))
654 		return true;
655 
656 	return ((old ^ new) & (~KVM_PTE_LEAF_ATTR_S2_PERMS));
657 }
658 
stage2_clear_pte(kvm_pte_t * ptep,struct kvm_s2_mmu * mmu,u64 addr,u32 level)659 static void stage2_clear_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr,
660 			     u32 level)
661 {
662 	if (!kvm_pte_valid(*ptep))
663 		return;
664 
665 	kvm_clear_pte(ptep);
666 	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, addr, level);
667 }
668 
stage2_put_pte(kvm_pte_t * ptep,struct kvm_s2_mmu * mmu,u64 addr,u32 level,struct kvm_pgtable_mm_ops * mm_ops)669 static void stage2_put_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr,
670 			   u32 level, struct kvm_pgtable_mm_ops *mm_ops)
671 {
672 	/*
673 	 * Clear the existing PTE, and perform break-before-make with
674 	 * TLB maintenance if it was valid.
675 	 */
676 	stage2_clear_pte(ptep, mmu, addr, level);
677 	mm_ops->put_page(ptep);
678 }
679 
stage2_pte_cacheable(struct kvm_pgtable * pgt,kvm_pte_t pte)680 static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
681 {
682 	u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
683 	return kvm_pte_valid(pte) && memattr == KVM_S2_MEMATTR(pgt, NORMAL);
684 }
685 
stage2_pte_executable(kvm_pte_t pte)686 static bool stage2_pte_executable(kvm_pte_t pte)
687 {
688 	kvm_pte_t xn = FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, pte);
689 
690 	return kvm_pte_valid(pte) && xn != KVM_PTE_LEAF_ATTR_HI_S2_XN_XN;
691 }
692 
stage2_leaf_mapping_allowed(u64 addr,u64 end,u32 level,struct stage2_map_data * data)693 static bool stage2_leaf_mapping_allowed(u64 addr, u64 end, u32 level,
694 					struct stage2_map_data *data)
695 {
696 	if (data->force_pte && (level < (KVM_PGTABLE_MAX_LEVELS - 1)))
697 		return false;
698 
699 	return kvm_block_mapping_supported(addr, end, data->phys, level);
700 }
701 
stage2_map_walker_try_leaf(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,struct stage2_map_data * data)702 static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
703 				      kvm_pte_t *ptep,
704 				      struct stage2_map_data *data)
705 {
706 	kvm_pte_t new, old = *ptep;
707 	u64 granule = kvm_granule_size(level), phys = data->phys;
708 	struct kvm_pgtable *pgt = data->mmu->pgt;
709 	struct kvm_pgtable_pte_ops *pte_ops = pgt->pte_ops;
710 	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
711 
712 	if (!stage2_leaf_mapping_allowed(addr, end, level, data))
713 		return -E2BIG;
714 
715 	if (kvm_phys_is_valid(phys))
716 		new = kvm_init_valid_leaf_pte(phys, data->attr, level);
717 	else
718 		new = data->annotation;
719 
720 	/*
721 	 * Skip updating a guest PTE if we are trying to recreate the exact
722 	 * same mapping or change only the access permissions. Instead,
723 	 * the vCPU will exit one more time from the guest if still needed
724 	 * and then go through the path of relaxing permissions. This applies
725 	 * only to guest PTEs; Host PTEs are unconditionally updated. The
726 	 * host cannot livelock because the abort handler has done prior
727 	 * checks before calling here.
728 	 */
729 	if (!stage2_pte_needs_update(pgt, old, new))
730 		return -EAGAIN;
731 
732 	if (pte_ops->pte_is_counted_cb(old, level))
733 		mm_ops->put_page(ptep);
734 
735 	/*
736 	 * If we're only changing software bits, then we don't need to
737 	 * do anything else.
738 	 */
739 	if (!((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW))
740 		goto out_set_pte;
741 
742 	stage2_clear_pte(ptep, data->mmu, addr, level);
743 
744 	/* Perform CMOs before installation of the guest stage-2 PTE */
745 	if (mm_ops->dcache_clean_inval_poc && stage2_pte_cacheable(pgt, new))
746 		mm_ops->dcache_clean_inval_poc(kvm_pte_follow(new, mm_ops),
747 					       granule);
748 	if (mm_ops->icache_inval_pou && stage2_pte_executable(new))
749 		mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule);
750 
751 out_set_pte:
752 	if (pte_ops->pte_is_counted_cb(new, level))
753 		mm_ops->get_page(ptep);
754 
755 	smp_store_release(ptep, new);
756 	if (kvm_phys_is_valid(phys))
757 		data->phys += granule;
758 	return 0;
759 }
760 
stage2_map_walk_table_pre(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,struct stage2_map_data * data)761 static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level,
762 				     kvm_pte_t *ptep,
763 				     struct stage2_map_data *data)
764 {
765 	if (data->anchor)
766 		return 0;
767 
768 	if (!stage2_leaf_mapping_allowed(addr, end, level, data))
769 		return 0;
770 
771 	data->childp = kvm_pte_follow(*ptep, data->mm_ops);
772 	kvm_clear_pte(ptep);
773 
774 	/*
775 	 * Invalidate the whole stage-2, as we may have numerous leaf
776 	 * entries below us which would otherwise need invalidating
777 	 * individually.
778 	 */
779 	kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu);
780 	data->anchor = ptep;
781 	return 0;
782 }
783 
stage2_map_prefault_idmap(struct kvm_pgtable_pte_ops * pte_ops,u64 addr,u64 end,u32 level,kvm_pte_t * ptep,kvm_pte_t block_pte)784 static void stage2_map_prefault_idmap(struct kvm_pgtable_pte_ops *pte_ops,
785 				      u64 addr, u64 end, u32 level,
786 				      kvm_pte_t *ptep, kvm_pte_t block_pte)
787 {
788 	u64 pa, granule;
789 	int i;
790 
791 	WARN_ON(pte_ops->pte_is_counted_cb(block_pte, level-1));
792 
793 	if (!kvm_pte_valid(block_pte))
794 		return;
795 
796 	pa = ALIGN_DOWN(addr, kvm_granule_size(level-1));
797 	granule = kvm_granule_size(level);
798 	for (i = 0; i < PTRS_PER_PTE; ++i, ++ptep, pa += granule) {
799 		kvm_pte_t pte = kvm_init_valid_leaf_pte(pa, block_pte, level);
800 		/* Skip ptes in the range being modified by the caller. */
801 		if ((pa < addr) || (pa >= end)) {
802 			/* We can write non-atomically: ptep isn't yet live. */
803 			*ptep = pte;
804 		}
805 	}
806 }
807 
stage2_map_walk_leaf(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,struct stage2_map_data * data)808 static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
809 				struct stage2_map_data *data)
810 {
811 	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
812 	struct kvm_pgtable *pgt = data->mmu->pgt;
813 	struct kvm_pgtable_pte_ops *pte_ops = pgt->pte_ops;
814 	kvm_pte_t *childp, pte = *ptep;
815 	int ret;
816 
817 	if (data->anchor) {
818 		if (pte_ops->pte_is_counted_cb(pte, level))
819 			mm_ops->put_page(ptep);
820 
821 		return 0;
822 	}
823 
824 	ret = stage2_map_walker_try_leaf(addr, end, level, ptep, data);
825 	if (ret != -E2BIG)
826 		return ret;
827 
828 	if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1))
829 		return -EINVAL;
830 
831 	if (!data->memcache)
832 		return -ENOMEM;
833 
834 	childp = mm_ops->zalloc_page(data->memcache);
835 	if (!childp)
836 		return -ENOMEM;
837 
838 	if (pgt->flags & KVM_PGTABLE_S2_IDMAP) {
839 		stage2_map_prefault_idmap(pte_ops, addr, end, level + 1,
840 					  childp, pte);
841 	}
842 
843 	/*
844 	 * If we've run into an existing block mapping then replace it with
845 	 * a table. Accesses beyond 'end' that fall within the new table
846 	 * will be mapped lazily.
847 	 */
848 	if (pte_ops->pte_is_counted_cb(pte, level)) {
849 		stage2_put_pte(ptep, data->mmu, addr, level, mm_ops);
850 	} else {
851 		/*
852 		 * On non-refcounted PTEs we just clear them out without
853 		 * dropping the refcount.
854 		 */
855 		stage2_clear_pte(ptep, data->mmu, addr, level);
856 	}
857 
858 	kvm_set_table_pte(ptep, childp, mm_ops);
859 	mm_ops->get_page(ptep);
860 
861 	return 0;
862 }
863 
stage2_coalesce_walk_table_post(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,struct stage2_map_data * data)864 static void stage2_coalesce_walk_table_post(u64 addr, u64 end, u32 level,
865 					    kvm_pte_t *ptep,
866 					    struct stage2_map_data *data)
867 {
868 	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
869 	kvm_pte_t *childp = kvm_pte_follow(*ptep, mm_ops);
870 
871 	/*
872 	 * Decrement the refcount only on the set ownership path to avoid a
873 	 * loop situation when the following happens:
874 	 *  1. We take a host stage2 fault and we create a small mapping which
875 	 *  has default attributes (is not refcounted).
876 	 *  2. On the way back we execute the post handler and we zap the
877 	 *  table that holds our mapping.
878 	 */
879 	if (kvm_phys_is_valid(data->phys) ||
880 	    !kvm_level_supports_block_mapping(level))
881 		return;
882 
883 	/*
884 	 * Free a page that is not referenced anymore and drop the reference
885 	 * of the page table page.
886 	 */
887 	if (mm_ops->page_count(childp) == 1) {
888 		kvm_clear_pte(ptep);
889 		kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu);
890 		mm_ops->put_page(ptep);
891 		mm_ops->put_page(childp);
892 	}
893 }
894 
stage2_map_walk_table_post(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,struct stage2_map_data * data)895 static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level,
896 				      kvm_pte_t *ptep,
897 				      struct stage2_map_data *data)
898 {
899 	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
900 	kvm_pte_t *childp;
901 	int ret = 0;
902 
903 	if (!data->anchor) {
904 		stage2_coalesce_walk_table_post(addr, end, level, ptep,
905 						data);
906 		return 0;
907 	}
908 
909 	if (data->anchor == ptep) {
910 		childp = data->childp;
911 		data->anchor = NULL;
912 		data->childp = NULL;
913 		ret = stage2_map_walk_leaf(addr, end, level, ptep, data);
914 	} else {
915 		childp = kvm_pte_follow(*ptep, mm_ops);
916 	}
917 
918 	mm_ops->put_page(childp);
919 	mm_ops->put_page(ptep);
920 
921 	return ret;
922 }
923 
924 /*
925  * This is a little fiddly, as we use all three of the walk flags. The idea
926  * is that the TABLE_PRE callback runs for table entries on the way down,
927  * looking for table entries which we could conceivably replace with a
928  * block entry for this mapping. If it finds one, then it sets the 'anchor'
929  * field in 'struct stage2_map_data' to point at the table entry, before
930  * clearing the entry to zero and descending into the now detached table.
931  *
932  * The behaviour of the LEAF callback then depends on whether or not the
933  * anchor has been set. If not, then we're not using a block mapping higher
934  * up the table and we perform the mapping at the existing leaves instead.
935  * If, on the other hand, the anchor _is_ set, then we drop references to
936  * all valid leaves so that the pages beneath the anchor can be freed.
937  *
938  * Finally, the TABLE_POST callback does nothing if the anchor has not
939  * been set, but otherwise frees the page-table pages while walking back up
940  * the page-table, installing the block entry when it revisits the anchor
941  * pointer and clearing the anchor to NULL.
942  */
stage2_map_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)943 static int stage2_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
944 			     enum kvm_pgtable_walk_flags flag, void * const arg)
945 {
946 	struct stage2_map_data *data = arg;
947 
948 	switch (flag) {
949 	case KVM_PGTABLE_WALK_TABLE_PRE:
950 		return stage2_map_walk_table_pre(addr, end, level, ptep, data);
951 	case KVM_PGTABLE_WALK_LEAF:
952 		return stage2_map_walk_leaf(addr, end, level, ptep, data);
953 	case KVM_PGTABLE_WALK_TABLE_POST:
954 		return stage2_map_walk_table_post(addr, end, level, ptep, data);
955 	}
956 
957 	return -EINVAL;
958 }
959 
kvm_pgtable_stage2_map(struct kvm_pgtable * pgt,u64 addr,u64 size,u64 phys,enum kvm_pgtable_prot prot,void * mc)960 int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
961 			   u64 phys, enum kvm_pgtable_prot prot,
962 			   void *mc)
963 {
964 	int ret;
965 	struct kvm_pgtable_pte_ops *pte_ops = pgt->pte_ops;
966 	struct stage2_map_data map_data = {
967 		.phys		= ALIGN_DOWN(phys, PAGE_SIZE),
968 		.mmu		= pgt->mmu,
969 		.memcache	= mc,
970 		.mm_ops		= pgt->mm_ops,
971 	};
972 	struct kvm_pgtable_walker walker = {
973 		.cb		= stage2_map_walker,
974 		.flags		= KVM_PGTABLE_WALK_TABLE_PRE |
975 				  KVM_PGTABLE_WALK_LEAF |
976 				  KVM_PGTABLE_WALK_TABLE_POST,
977 		.arg		= &map_data,
978 	};
979 
980 	if (pte_ops->force_pte_cb)
981 		map_data.force_pte = pte_ops->force_pte_cb(addr, addr + size, prot);
982 
983 	if (WARN_ON((pgt->flags & KVM_PGTABLE_S2_IDMAP) && (addr != phys)))
984 		return -EINVAL;
985 
986 	ret = stage2_set_prot_attr(pgt, prot, &map_data.attr);
987 	if (ret)
988 		return ret;
989 
990 	ret = kvm_pgtable_walk(pgt, addr, size, &walker);
991 	dsb(ishst);
992 	return ret;
993 }
994 
kvm_pgtable_stage2_annotate(struct kvm_pgtable * pgt,u64 addr,u64 size,void * mc,kvm_pte_t annotation)995 int kvm_pgtable_stage2_annotate(struct kvm_pgtable *pgt, u64 addr, u64 size,
996 				void *mc, kvm_pte_t annotation)
997 {
998 	int ret;
999 	struct stage2_map_data map_data = {
1000 		.phys		= KVM_PHYS_INVALID,
1001 		.mmu		= pgt->mmu,
1002 		.memcache	= mc,
1003 		.mm_ops		= pgt->mm_ops,
1004 		.force_pte	= true,
1005 		.annotation	= annotation,
1006 	};
1007 	struct kvm_pgtable_walker walker = {
1008 		.cb		= stage2_map_walker,
1009 		.flags		= KVM_PGTABLE_WALK_TABLE_PRE |
1010 				  KVM_PGTABLE_WALK_LEAF |
1011 				  KVM_PGTABLE_WALK_TABLE_POST,
1012 		.arg		= &map_data,
1013 	};
1014 
1015 	if (annotation & PTE_VALID)
1016 		return -EINVAL;
1017 
1018 	ret = kvm_pgtable_walk(pgt, addr, size, &walker);
1019 	return ret;
1020 }
1021 
stage2_unmap_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)1022 static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
1023 			       enum kvm_pgtable_walk_flags flag,
1024 			       void * const arg)
1025 {
1026 	struct kvm_pgtable *pgt = arg;
1027 	struct kvm_s2_mmu *mmu = pgt->mmu;
1028 	struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
1029 	struct kvm_pgtable_pte_ops *pte_ops = pgt->pte_ops;
1030 	kvm_pte_t pte = *ptep, *childp = NULL;
1031 	bool need_flush = false;
1032 
1033 	if (!kvm_pte_valid(pte)) {
1034 		if (pte_ops->pte_is_counted_cb(pte, level)) {
1035 			kvm_clear_pte(ptep);
1036 			mm_ops->put_page(ptep);
1037 		}
1038 		return 0;
1039 	}
1040 
1041 	if (kvm_pte_table(pte, level)) {
1042 		childp = kvm_pte_follow(pte, mm_ops);
1043 
1044 		if (mm_ops->page_count(childp) != 1)
1045 			return 0;
1046 	} else if (stage2_pte_cacheable(pgt, pte)) {
1047 		need_flush = !stage2_has_fwb(pgt);
1048 	}
1049 
1050 	/*
1051 	 * This is similar to the map() path in that we unmap the entire
1052 	 * block entry and rely on the remaining portions being faulted
1053 	 * back lazily.
1054 	 */
1055 	if (pte_ops->pte_is_counted_cb(pte, level))
1056 		stage2_put_pte(ptep, mmu, addr, level, mm_ops);
1057 	else
1058 		stage2_clear_pte(ptep, mmu, addr, level);
1059 
1060 	if (need_flush && mm_ops->dcache_clean_inval_poc)
1061 		mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
1062 					       kvm_granule_size(level));
1063 
1064 	if (childp)
1065 		mm_ops->put_page(childp);
1066 
1067 	return 0;
1068 }
1069 
kvm_pgtable_stage2_unmap(struct kvm_pgtable * pgt,u64 addr,u64 size)1070 int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
1071 {
1072 	struct kvm_pgtable_walker walker = {
1073 		.cb	= stage2_unmap_walker,
1074 		.arg	= pgt,
1075 		.flags	= KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
1076 	};
1077 
1078 	return kvm_pgtable_walk(pgt, addr, size, &walker);
1079 }
1080 
stage2_reclaim_leaf_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)1081 static int stage2_reclaim_leaf_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
1082 				      enum kvm_pgtable_walk_flags flag, void * const arg)
1083 {
1084 	stage2_coalesce_walk_table_post(addr, end, level, ptep, arg);
1085 
1086 	return 0;
1087 }
1088 
kvm_pgtable_stage2_reclaim_leaves(struct kvm_pgtable * pgt,u64 addr,u64 size)1089 int kvm_pgtable_stage2_reclaim_leaves(struct kvm_pgtable *pgt, u64 addr, u64 size)
1090 {
1091 	struct stage2_map_data map_data = {
1092 		.phys		= KVM_PHYS_INVALID,
1093 		.mmu		= pgt->mmu,
1094 		.mm_ops		= pgt->mm_ops,
1095 	};
1096 	struct kvm_pgtable_walker walker = {
1097 		.cb	= stage2_reclaim_leaf_walker,
1098 		.arg	= &map_data,
1099 		.flags	= KVM_PGTABLE_WALK_TABLE_POST,
1100 	};
1101 
1102 	return kvm_pgtable_walk(pgt, addr, size, &walker);
1103 }
1104 
1105 struct stage2_attr_data {
1106 	kvm_pte_t			attr_set;
1107 	kvm_pte_t			attr_clr;
1108 	kvm_pte_t			pte;
1109 	u32				level;
1110 	struct kvm_pgtable_mm_ops	*mm_ops;
1111 };
1112 
stage2_attr_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)1113 static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
1114 			      enum kvm_pgtable_walk_flags flag,
1115 			      void * const arg)
1116 {
1117 	kvm_pte_t pte = *ptep;
1118 	struct stage2_attr_data *data = arg;
1119 	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
1120 
1121 	if (!kvm_pte_valid(pte))
1122 		return 0;
1123 
1124 	data->level = level;
1125 	data->pte = pte;
1126 	pte &= ~data->attr_clr;
1127 	pte |= data->attr_set;
1128 
1129 	/*
1130 	 * We may race with the CPU trying to set the access flag here,
1131 	 * but worst-case the access flag update gets lost and will be
1132 	 * set on the next access instead.
1133 	 */
1134 	if (data->pte != pte) {
1135 		/*
1136 		 * Invalidate instruction cache before updating the guest
1137 		 * stage-2 PTE if we are going to add executable permission.
1138 		 */
1139 		if (mm_ops->icache_inval_pou &&
1140 		    stage2_pte_executable(pte) && !stage2_pte_executable(*ptep))
1141 			mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops),
1142 						  kvm_granule_size(level));
1143 		WRITE_ONCE(*ptep, pte);
1144 	}
1145 
1146 	return 0;
1147 }
1148 
stage2_update_leaf_attrs(struct kvm_pgtable * pgt,u64 addr,u64 size,kvm_pte_t attr_set,kvm_pte_t attr_clr,kvm_pte_t * orig_pte,u32 * level)1149 static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
1150 				    u64 size, kvm_pte_t attr_set,
1151 				    kvm_pte_t attr_clr, kvm_pte_t *orig_pte,
1152 				    u32 *level)
1153 {
1154 	int ret;
1155 	kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI;
1156 	struct stage2_attr_data data = {
1157 		.attr_set	= attr_set & attr_mask,
1158 		.attr_clr	= attr_clr & attr_mask,
1159 		.mm_ops		= pgt->mm_ops,
1160 	};
1161 	struct kvm_pgtable_walker walker = {
1162 		.cb		= stage2_attr_walker,
1163 		.arg		= &data,
1164 		.flags		= KVM_PGTABLE_WALK_LEAF,
1165 	};
1166 
1167 	ret = kvm_pgtable_walk(pgt, addr, size, &walker);
1168 	if (ret)
1169 		return ret;
1170 
1171 	if (orig_pte)
1172 		*orig_pte = data.pte;
1173 
1174 	if (level)
1175 		*level = data.level;
1176 	return 0;
1177 }
1178 
kvm_pgtable_stage2_wrprotect(struct kvm_pgtable * pgt,u64 addr,u64 size)1179 int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
1180 {
1181 	return stage2_update_leaf_attrs(pgt, addr, size, 0,
1182 					KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
1183 					NULL, NULL);
1184 }
1185 
kvm_pgtable_stage2_mkyoung(struct kvm_pgtable * pgt,u64 addr)1186 kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr)
1187 {
1188 	kvm_pte_t pte = 0;
1189 	stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0,
1190 				 &pte, NULL);
1191 	dsb(ishst);
1192 	return pte;
1193 }
1194 
kvm_pgtable_stage2_mkold(struct kvm_pgtable * pgt,u64 addr)1195 kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr)
1196 {
1197 	kvm_pte_t pte = 0;
1198 	stage2_update_leaf_attrs(pgt, addr, 1, 0, KVM_PTE_LEAF_ATTR_LO_S2_AF,
1199 				 &pte, NULL);
1200 	/*
1201 	 * "But where's the TLBI?!", you scream.
1202 	 * "Over in the core code", I sigh.
1203 	 *
1204 	 * See the '->clear_flush_young()' callback on the KVM mmu notifier.
1205 	 */
1206 	return pte;
1207 }
1208 
kvm_pgtable_stage2_is_young(struct kvm_pgtable * pgt,u64 addr)1209 bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr)
1210 {
1211 	kvm_pte_t pte = 0;
1212 	stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL);
1213 	return pte & KVM_PTE_LEAF_ATTR_LO_S2_AF;
1214 }
1215 
kvm_pgtable_stage2_relax_perms(struct kvm_pgtable * pgt,u64 addr,enum kvm_pgtable_prot prot)1216 int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
1217 				   enum kvm_pgtable_prot prot)
1218 {
1219 	int ret;
1220 	u32 level;
1221 	kvm_pte_t set = 0, clr = 0;
1222 
1223 	if (prot & !KVM_PGTABLE_PROT_RWX)
1224 		return -EINVAL;
1225 
1226 	if (prot & KVM_PGTABLE_PROT_R)
1227 		set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
1228 
1229 	if (prot & KVM_PGTABLE_PROT_W)
1230 		set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
1231 
1232 	if (prot & KVM_PGTABLE_PROT_X)
1233 		clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
1234 
1235 	ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level);
1236 	if (!ret)
1237 		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level);
1238 	return ret;
1239 }
1240 
stage2_flush_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)1241 static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
1242 			       enum kvm_pgtable_walk_flags flag,
1243 			       void * const arg)
1244 {
1245 	struct kvm_pgtable *pgt = arg;
1246 	struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
1247 	kvm_pte_t pte = *ptep;
1248 
1249 	if (!stage2_pte_cacheable(pgt, pte))
1250 		return 0;
1251 
1252 	if (mm_ops->dcache_clean_inval_poc)
1253 		mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
1254 					       kvm_granule_size(level));
1255 	return 0;
1256 }
1257 
kvm_pgtable_stage2_flush(struct kvm_pgtable * pgt,u64 addr,u64 size)1258 int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
1259 {
1260 	struct kvm_pgtable_walker walker = {
1261 		.cb	= stage2_flush_walker,
1262 		.flags	= KVM_PGTABLE_WALK_LEAF,
1263 		.arg	= pgt,
1264 	};
1265 
1266 	if (stage2_has_fwb(pgt))
1267 		return 0;
1268 
1269 	return kvm_pgtable_walk(pgt, addr, size, &walker);
1270 }
1271 
1272 
__kvm_pgtable_stage2_init(struct kvm_pgtable * pgt,struct kvm_s2_mmu * mmu,struct kvm_pgtable_mm_ops * mm_ops,enum kvm_pgtable_stage2_flags flags,struct kvm_pgtable_pte_ops * pte_ops)1273 int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
1274 			      struct kvm_pgtable_mm_ops *mm_ops,
1275 			      enum kvm_pgtable_stage2_flags flags,
1276 			      struct kvm_pgtable_pte_ops *pte_ops)
1277 {
1278 	size_t pgd_sz;
1279 	u64 vtcr = mmu->arch->vtcr;
1280 	u32 ia_bits = VTCR_EL2_IPA(vtcr);
1281 	u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
1282 	u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
1283 
1284 	pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
1285 	pgt->pgd = mm_ops->zalloc_pages_exact(pgd_sz);
1286 	if (!pgt->pgd)
1287 		return -ENOMEM;
1288 
1289 	pgt->ia_bits		= ia_bits;
1290 	pgt->start_level	= start_level;
1291 	pgt->mm_ops		= mm_ops;
1292 	pgt->mmu		= mmu;
1293 	pgt->flags		= flags;
1294 	pgt->pte_ops		= pte_ops;
1295 
1296 	/* Ensure zeroed PGD pages are visible to the hardware walker */
1297 	dsb(ishst);
1298 	return 0;
1299 }
1300 
kvm_pgtable_stage2_pgd_size(u64 vtcr)1301 size_t kvm_pgtable_stage2_pgd_size(u64 vtcr)
1302 {
1303 	u32 ia_bits = VTCR_EL2_IPA(vtcr);
1304 	u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
1305 	u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
1306 
1307 	return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
1308 }
1309 
stage2_free_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)1310 static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
1311 			      enum kvm_pgtable_walk_flags flag,
1312 			      void * const arg)
1313 {
1314 	struct kvm_pgtable *pgt = arg;
1315 	struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
1316 	struct kvm_pgtable_pte_ops *pte_ops = pgt->pte_ops;
1317 	kvm_pte_t pte = *ptep;
1318 
1319 	if (!pte_ops->pte_is_counted_cb(pte, level))
1320 		return 0;
1321 
1322 	mm_ops->put_page(ptep);
1323 
1324 	if (kvm_pte_table(pte, level))
1325 		mm_ops->put_page(kvm_pte_follow(pte, mm_ops));
1326 
1327 	return 0;
1328 }
1329 
kvm_pgtable_stage2_destroy(struct kvm_pgtable * pgt)1330 void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
1331 {
1332 	size_t pgd_sz;
1333 	struct kvm_pgtable_walker walker = {
1334 		.cb	= stage2_free_walker,
1335 		.flags	= KVM_PGTABLE_WALK_LEAF |
1336 			  KVM_PGTABLE_WALK_TABLE_POST,
1337 		.arg	= pgt,
1338 	};
1339 
1340 	WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
1341 	pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE;
1342 	pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz);
1343 	pgt->pgd = NULL;
1344 }
1345