• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Stand-alone page-table allocator for hyp stage-1 and guest stage-2.
4  * No bombay mix was harmed in the writing of this file.
5  *
6  * Copyright (C) 2020 Google LLC
7  * Author: Will Deacon <will@kernel.org>
8  */
9 
10 #include <linux/bitfield.h>
11 #include <asm/kvm_pgtable.h>
12 #include <asm/stage2_pgtable.h>
13 
14 
15 #define KVM_PTE_TYPE			BIT(1)
16 #define KVM_PTE_TYPE_BLOCK		0
17 #define KVM_PTE_TYPE_PAGE		1
18 #define KVM_PTE_TYPE_TABLE		1
19 
20 #define KVM_PTE_LEAF_ATTR_LO		GENMASK(11, 2)
21 
22 #define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX	GENMASK(4, 2)
23 #define KVM_PTE_LEAF_ATTR_LO_S1_AP	GENMASK(7, 6)
24 #define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO	3
25 #define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW	1
26 #define KVM_PTE_LEAF_ATTR_LO_S1_SH	GENMASK(9, 8)
27 #define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS	3
28 #define KVM_PTE_LEAF_ATTR_LO_S1_AF	BIT(10)
29 
30 #define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR	GENMASK(5, 2)
31 #define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R	BIT(6)
32 #define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W	BIT(7)
33 #define KVM_PTE_LEAF_ATTR_LO_S2_SH	GENMASK(9, 8)
34 #define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS	3
35 #define KVM_PTE_LEAF_ATTR_LO_S2_AF	BIT(10)
36 
37 #define KVM_PTE_LEAF_ATTR_HI		GENMASK(63, 51)
38 
39 #define KVM_PTE_LEAF_ATTR_HI_SW		GENMASK(58, 55)
40 
41 #define KVM_PTE_LEAF_ATTR_HI_S1_XN	BIT(54)
42 
43 #define KVM_PTE_LEAF_ATTR_HI_S2_XN	BIT(54)
44 
45 #define KVM_PTE_LEAF_ATTR_S2_PERMS	(KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \
46 					 KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
47 					 KVM_PTE_LEAF_ATTR_HI_S2_XN)
48 
49 struct kvm_pgtable_walk_data {
50 	struct kvm_pgtable		*pgt;
51 	struct kvm_pgtable_walker	*walker;
52 
53 	u64				addr;
54 	u64				end;
55 };
56 
57 #define KVM_PHYS_INVALID (-1ULL)
58 
kvm_phys_is_valid(u64 phys)59 static bool kvm_phys_is_valid(u64 phys)
60 {
61 	return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_PARANGE_MAX));
62 }
63 
kvm_block_mapping_supported(u64 addr,u64 end,u64 phys,u32 level)64 static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level)
65 {
66 	u64 granule = kvm_granule_size(level);
67 
68 	if (!kvm_level_supports_block_mapping(level))
69 		return false;
70 
71 	if (granule > (end - addr))
72 		return false;
73 
74 	if (kvm_phys_is_valid(phys) && !IS_ALIGNED(phys, granule))
75 		return false;
76 
77 	return IS_ALIGNED(addr, granule);
78 }
79 
kvm_pgtable_idx(struct kvm_pgtable_walk_data * data,u32 level)80 static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level)
81 {
82 	u64 shift = kvm_granule_shift(level);
83 	u64 mask = BIT(PAGE_SHIFT - 3) - 1;
84 
85 	return (data->addr >> shift) & mask;
86 }
87 
__kvm_pgd_page_idx(struct kvm_pgtable * pgt,u64 addr)88 static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr)
89 {
90 	u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */
91 	u64 mask = BIT(pgt->ia_bits) - 1;
92 
93 	return (addr & mask) >> shift;
94 }
95 
kvm_pgd_page_idx(struct kvm_pgtable_walk_data * data)96 static u32 kvm_pgd_page_idx(struct kvm_pgtable_walk_data *data)
97 {
98 	return __kvm_pgd_page_idx(data->pgt, data->addr);
99 }
100 
kvm_pgd_pages(u32 ia_bits,u32 start_level)101 static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
102 {
103 	struct kvm_pgtable pgt = {
104 		.ia_bits	= ia_bits,
105 		.start_level	= start_level,
106 	};
107 
108 	return __kvm_pgd_page_idx(&pgt, -1ULL) + 1;
109 }
110 
kvm_pte_table(kvm_pte_t pte,u32 level)111 static bool kvm_pte_table(kvm_pte_t pte, u32 level)
112 {
113 	if (level == KVM_PGTABLE_MAX_LEVELS - 1)
114 		return false;
115 
116 	if (!kvm_pte_valid(pte))
117 		return false;
118 
119 	return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE;
120 }
121 
kvm_phys_to_pte(u64 pa)122 static kvm_pte_t kvm_phys_to_pte(u64 pa)
123 {
124 	kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK;
125 
126 	if (PAGE_SHIFT == 16)
127 		pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48);
128 
129 	return pte;
130 }
131 
kvm_pte_follow(kvm_pte_t pte,struct kvm_pgtable_mm_ops * mm_ops)132 static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte, struct kvm_pgtable_mm_ops *mm_ops)
133 {
134 	return mm_ops->phys_to_virt(kvm_pte_to_phys(pte));
135 }
136 
kvm_clear_pte(kvm_pte_t * ptep)137 static void kvm_clear_pte(kvm_pte_t *ptep)
138 {
139 	WRITE_ONCE(*ptep, 0);
140 }
141 
kvm_set_table_pte(kvm_pte_t * ptep,kvm_pte_t * childp,struct kvm_pgtable_mm_ops * mm_ops)142 static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp,
143 			      struct kvm_pgtable_mm_ops *mm_ops)
144 {
145 	kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(mm_ops->virt_to_phys(childp));
146 
147 	pte |= FIELD_PREP(KVM_PTE_TYPE, KVM_PTE_TYPE_TABLE);
148 	pte |= KVM_PTE_VALID;
149 
150 	WARN_ON(kvm_pte_valid(old));
151 	smp_store_release(ptep, pte);
152 }
153 
kvm_init_valid_leaf_pte(u64 pa,kvm_pte_t attr,u32 level)154 static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, u32 level)
155 {
156 	kvm_pte_t pte = kvm_phys_to_pte(pa);
157 	u64 type = (level == KVM_PGTABLE_MAX_LEVELS - 1) ? KVM_PTE_TYPE_PAGE :
158 							   KVM_PTE_TYPE_BLOCK;
159 
160 	pte |= attr & (KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI);
161 	pte |= FIELD_PREP(KVM_PTE_TYPE, type);
162 	pte |= KVM_PTE_VALID;
163 
164 	return pte;
165 }
166 
kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data * data,u64 addr,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag)167 static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr,
168 				  u32 level, kvm_pte_t *ptep,
169 				  enum kvm_pgtable_walk_flags flag)
170 {
171 	struct kvm_pgtable_walker *walker = data->walker;
172 	return walker->cb(addr, data->end, level, ptep, flag, walker->arg);
173 }
174 
175 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
176 			      kvm_pte_t *pgtable, u32 level);
177 
__kvm_pgtable_visit(struct kvm_pgtable_walk_data * data,kvm_pte_t * ptep,u32 level)178 static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
179 				      kvm_pte_t *ptep, u32 level)
180 {
181 	int ret = 0;
182 	u64 addr = data->addr;
183 	kvm_pte_t *childp, pte = *ptep;
184 	bool table = kvm_pte_table(pte, level);
185 	enum kvm_pgtable_walk_flags flags = data->walker->flags;
186 
187 	if (table && (flags & KVM_PGTABLE_WALK_TABLE_PRE)) {
188 		ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
189 					     KVM_PGTABLE_WALK_TABLE_PRE);
190 	}
191 
192 	if (!table && (flags & KVM_PGTABLE_WALK_LEAF)) {
193 		ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
194 					     KVM_PGTABLE_WALK_LEAF);
195 		pte = *ptep;
196 		table = kvm_pte_table(pte, level);
197 	}
198 
199 	if (ret)
200 		goto out;
201 
202 	if (!table) {
203 		data->addr = ALIGN_DOWN(data->addr, kvm_granule_size(level));
204 		data->addr += kvm_granule_size(level);
205 		goto out;
206 	}
207 
208 	childp = kvm_pte_follow(pte, data->pgt->mm_ops);
209 	ret = __kvm_pgtable_walk(data, childp, level + 1);
210 	if (ret)
211 		goto out;
212 
213 	if (flags & KVM_PGTABLE_WALK_TABLE_POST) {
214 		ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
215 					     KVM_PGTABLE_WALK_TABLE_POST);
216 	}
217 
218 out:
219 	return ret;
220 }
221 
__kvm_pgtable_walk(struct kvm_pgtable_walk_data * data,kvm_pte_t * pgtable,u32 level)222 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
223 			      kvm_pte_t *pgtable, u32 level)
224 {
225 	u32 idx;
226 	int ret = 0;
227 
228 	if (WARN_ON_ONCE(level >= KVM_PGTABLE_MAX_LEVELS))
229 		return -EINVAL;
230 
231 	for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) {
232 		kvm_pte_t *ptep = &pgtable[idx];
233 
234 		if (data->addr >= data->end)
235 			break;
236 
237 		ret = __kvm_pgtable_visit(data, ptep, level);
238 		if (ret)
239 			break;
240 	}
241 
242 	return ret;
243 }
244 
_kvm_pgtable_walk(struct kvm_pgtable_walk_data * data)245 static int _kvm_pgtable_walk(struct kvm_pgtable_walk_data *data)
246 {
247 	u32 idx;
248 	int ret = 0;
249 	struct kvm_pgtable *pgt = data->pgt;
250 	u64 limit = BIT(pgt->ia_bits);
251 
252 	if (data->addr > limit || data->end > limit)
253 		return -ERANGE;
254 
255 	if (!pgt->pgd)
256 		return -EINVAL;
257 
258 	for (idx = kvm_pgd_page_idx(data); data->addr < data->end; ++idx) {
259 		kvm_pte_t *ptep = &pgt->pgd[idx * PTRS_PER_PTE];
260 
261 		ret = __kvm_pgtable_walk(data, ptep, pgt->start_level);
262 		if (ret)
263 			break;
264 	}
265 
266 	return ret;
267 }
268 
kvm_pgtable_walk(struct kvm_pgtable * pgt,u64 addr,u64 size,struct kvm_pgtable_walker * walker)269 int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
270 		     struct kvm_pgtable_walker *walker)
271 {
272 	struct kvm_pgtable_walk_data walk_data = {
273 		.pgt	= pgt,
274 		.addr	= ALIGN_DOWN(addr, PAGE_SIZE),
275 		.end	= PAGE_ALIGN(walk_data.addr + size),
276 		.walker	= walker,
277 	};
278 
279 	return _kvm_pgtable_walk(&walk_data);
280 }
281 
282 struct leaf_walk_data {
283 	kvm_pte_t	pte;
284 	u32		level;
285 };
286 
leaf_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)287 static int leaf_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
288 		       enum kvm_pgtable_walk_flags flag, void * const arg)
289 {
290 	struct leaf_walk_data *data = arg;
291 
292 	data->pte   = *ptep;
293 	data->level = level;
294 
295 	return 0;
296 }
297 
kvm_pgtable_get_leaf(struct kvm_pgtable * pgt,u64 addr,kvm_pte_t * ptep,u32 * level)298 int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
299 			 kvm_pte_t *ptep, u32 *level)
300 {
301 	struct leaf_walk_data data;
302 	struct kvm_pgtable_walker walker = {
303 		.cb	= leaf_walker,
304 		.flags	= KVM_PGTABLE_WALK_LEAF,
305 		.arg	= &data,
306 	};
307 	int ret;
308 
309 	ret = kvm_pgtable_walk(pgt, ALIGN_DOWN(addr, PAGE_SIZE),
310 			       PAGE_SIZE, &walker);
311 	if (!ret) {
312 		if (ptep)
313 			*ptep  = data.pte;
314 		if (level)
315 			*level = data.level;
316 	}
317 
318 	return ret;
319 }
320 
321 struct hyp_map_data {
322 	u64				phys;
323 	kvm_pte_t			attr;
324 	struct kvm_pgtable_mm_ops	*mm_ops;
325 };
326 
hyp_set_prot_attr(enum kvm_pgtable_prot prot,kvm_pte_t * ptep)327 static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
328 {
329 	bool device = prot & KVM_PGTABLE_PROT_DEVICE;
330 	u32 mtype = device ? MT_DEVICE_nGnRE : MT_NORMAL;
331 	kvm_pte_t attr = FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX, mtype);
332 	u32 sh = KVM_PTE_LEAF_ATTR_LO_S1_SH_IS;
333 	u32 ap = (prot & KVM_PGTABLE_PROT_W) ? KVM_PTE_LEAF_ATTR_LO_S1_AP_RW :
334 					       KVM_PTE_LEAF_ATTR_LO_S1_AP_RO;
335 
336 	if (!(prot & KVM_PGTABLE_PROT_R))
337 		return -EINVAL;
338 
339 	if (prot & KVM_PGTABLE_PROT_X) {
340 		if (prot & KVM_PGTABLE_PROT_W)
341 			return -EINVAL;
342 
343 		if (device)
344 			return -EINVAL;
345 	} else {
346 		attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN;
347 	}
348 
349 	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap);
350 	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
351 	attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF;
352 	attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
353 	*ptep = attr;
354 
355 	return 0;
356 }
357 
kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)358 enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)
359 {
360 	enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW;
361 	u32 ap;
362 
363 	if (!kvm_pte_valid(pte))
364 		return prot;
365 
366 	if (!(pte & KVM_PTE_LEAF_ATTR_HI_S1_XN))
367 		prot |= KVM_PGTABLE_PROT_X;
368 
369 	ap = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_AP, pte);
370 	if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RO)
371 		prot |= KVM_PGTABLE_PROT_R;
372 	else if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RW)
373 		prot |= KVM_PGTABLE_PROT_RW;
374 
375 	return prot;
376 }
377 
hyp_map_walker_try_leaf(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,struct hyp_map_data * data)378 static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
379 				    kvm_pte_t *ptep, struct hyp_map_data *data)
380 {
381 	kvm_pte_t new, old = *ptep;
382 	u64 granule = kvm_granule_size(level), phys = data->phys;
383 
384 	if (!kvm_block_mapping_supported(addr, end, phys, level))
385 		return false;
386 
387 	data->phys += granule;
388 	new = kvm_init_valid_leaf_pte(phys, data->attr, level);
389 	if (old == new)
390 		return true;
391 	if (!kvm_pte_valid(old))
392 		data->mm_ops->get_page(ptep);
393 	else if (WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW))
394 		return false;
395 
396 	smp_store_release(ptep, new);
397 	return true;
398 }
399 
hyp_map_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)400 static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
401 			  enum kvm_pgtable_walk_flags flag, void * const arg)
402 {
403 	kvm_pte_t *childp;
404 	struct hyp_map_data *data = arg;
405 	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
406 
407 	if (hyp_map_walker_try_leaf(addr, end, level, ptep, arg))
408 		return 0;
409 
410 	if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1))
411 		return -EINVAL;
412 
413 	childp = (kvm_pte_t *)mm_ops->zalloc_page(NULL);
414 	if (!childp)
415 		return -ENOMEM;
416 
417 	kvm_set_table_pte(ptep, childp, mm_ops);
418 	mm_ops->get_page(ptep);
419 	return 0;
420 }
421 
kvm_pgtable_hyp_map(struct kvm_pgtable * pgt,u64 addr,u64 size,u64 phys,enum kvm_pgtable_prot prot)422 int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
423 			enum kvm_pgtable_prot prot)
424 {
425 	int ret;
426 	struct hyp_map_data map_data = {
427 		.phys	= ALIGN_DOWN(phys, PAGE_SIZE),
428 		.mm_ops	= pgt->mm_ops,
429 	};
430 	struct kvm_pgtable_walker walker = {
431 		.cb	= hyp_map_walker,
432 		.flags	= KVM_PGTABLE_WALK_LEAF,
433 		.arg	= &map_data,
434 	};
435 
436 	ret = hyp_set_prot_attr(prot, &map_data.attr);
437 	if (ret)
438 		return ret;
439 
440 	ret = kvm_pgtable_walk(pgt, addr, size, &walker);
441 	dsb(ishst);
442 	isb();
443 	return ret;
444 }
445 
446 struct hyp_unmap_data {
447 	u64				unmapped;
448 	struct kvm_pgtable_mm_ops	*mm_ops;
449 };
450 
hyp_unmap_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)451 static int hyp_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
452 			    enum kvm_pgtable_walk_flags flag, void * const arg)
453 {
454 	kvm_pte_t pte = *ptep, *childp = NULL;
455 	u64 granule = kvm_granule_size(level);
456 	struct hyp_unmap_data *data = arg;
457 	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
458 
459 	if (!kvm_pte_valid(pte))
460 		return -EINVAL;
461 
462 	if (kvm_pte_table(pte, level)) {
463 		childp = kvm_pte_follow(pte, mm_ops);
464 
465 		if (mm_ops->page_count(childp) != 1)
466 			return 0;
467 
468 		kvm_clear_pte(ptep);
469 		dsb(ishst);
470 		__tlbi_level(vae2is, __TLBI_VADDR(addr, 0), level);
471 	} else {
472 		if (end - addr < granule)
473 			return -EINVAL;
474 
475 		kvm_clear_pte(ptep);
476 		dsb(ishst);
477 		__tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level);
478 		data->unmapped += granule;
479 	}
480 
481 	dsb(ish);
482 	isb();
483 	mm_ops->put_page(ptep);
484 
485 	if (childp)
486 		mm_ops->put_page(childp);
487 
488 	return 0;
489 }
490 
kvm_pgtable_hyp_unmap(struct kvm_pgtable * pgt,u64 addr,u64 size)491 u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
492 {
493 	struct hyp_unmap_data unmap_data = {
494 		.mm_ops	= pgt->mm_ops,
495 	};
496 	struct kvm_pgtable_walker walker = {
497 		.cb	= hyp_unmap_walker,
498 		.arg	= &unmap_data,
499 		.flags	= KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
500 	};
501 
502 	if (!pgt->mm_ops->page_count)
503 		return 0;
504 
505 	kvm_pgtable_walk(pgt, addr, size, &walker);
506 	return unmap_data.unmapped;
507 }
508 
kvm_pgtable_hyp_init(struct kvm_pgtable * pgt,u32 va_bits,struct kvm_pgtable_mm_ops * mm_ops)509 int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
510 			 struct kvm_pgtable_mm_ops *mm_ops)
511 {
512 	u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits);
513 
514 	pgt->pgd = (kvm_pte_t *)mm_ops->zalloc_page(NULL);
515 	if (!pgt->pgd)
516 		return -ENOMEM;
517 
518 	pgt->ia_bits		= va_bits;
519 	pgt->start_level	= KVM_PGTABLE_MAX_LEVELS - levels;
520 	pgt->mm_ops		= mm_ops;
521 	pgt->mmu		= NULL;
522 	pgt->force_pte_cb	= NULL;
523 
524 	return 0;
525 }
526 
hyp_free_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)527 static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
528 			   enum kvm_pgtable_walk_flags flag, void * const arg)
529 {
530 	struct kvm_pgtable_mm_ops *mm_ops = arg;
531 	kvm_pte_t pte = *ptep;
532 
533 	if (!kvm_pte_valid(pte))
534 		return 0;
535 
536 	mm_ops->put_page(ptep);
537 
538 	if (kvm_pte_table(pte, level))
539 		mm_ops->put_page(kvm_pte_follow(pte, mm_ops));
540 
541 	return 0;
542 }
543 
kvm_pgtable_hyp_destroy(struct kvm_pgtable * pgt)544 void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
545 {
546 	struct kvm_pgtable_walker walker = {
547 		.cb	= hyp_free_walker,
548 		.flags	= KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
549 		.arg	= pgt->mm_ops,
550 	};
551 
552 	WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
553 	pgt->mm_ops->put_page(pgt->pgd);
554 	pgt->pgd = NULL;
555 }
556 
557 struct stage2_map_data {
558 	u64				phys;
559 	kvm_pte_t			attr;
560 	u64				annotation;
561 
562 	kvm_pte_t			*anchor;
563 	kvm_pte_t			*childp;
564 
565 	struct kvm_s2_mmu		*mmu;
566 	void				*memcache;
567 
568 	struct kvm_pgtable_mm_ops	*mm_ops;
569 
570 	/* Force mappings to page granularity */
571 	bool				force_pte;
572 };
573 
kvm_get_vtcr(u64 mmfr0,u64 mmfr1,u32 phys_shift)574 u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
575 {
576 	u64 vtcr = VTCR_EL2_FLAGS;
577 	u8 lvls;
578 
579 	vtcr |= kvm_get_parange(mmfr0) << VTCR_EL2_PS_SHIFT;
580 	vtcr |= VTCR_EL2_T0SZ(phys_shift);
581 	/*
582 	 * Use a minimum 2 level page table to prevent splitting
583 	 * host PMD huge pages at stage2.
584 	 */
585 	lvls = stage2_pgtable_levels(phys_shift);
586 	if (lvls < 2)
587 		lvls = 2;
588 	vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls);
589 
590 	/*
591 	 * Enable the Hardware Access Flag management, unconditionally
592 	 * on all CPUs. The features is RES0 on CPUs without the support
593 	 * and must be ignored by the CPUs.
594 	 */
595 	vtcr |= VTCR_EL2_HA;
596 
597 	/* Set the vmid bits */
598 	vtcr |= (get_vmid_bits(mmfr1) == 16) ?
599 		VTCR_EL2_VS_16BIT :
600 		VTCR_EL2_VS_8BIT;
601 
602 	return vtcr;
603 }
604 
stage2_has_fwb(struct kvm_pgtable * pgt)605 static bool stage2_has_fwb(struct kvm_pgtable *pgt)
606 {
607 	if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
608 		return false;
609 
610 	return !(pgt->flags & KVM_PGTABLE_S2_NOFWB);
611 }
612 
613 #define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))
614 
stage2_set_prot_attr(struct kvm_pgtable * pgt,enum kvm_pgtable_prot prot,kvm_pte_t * ptep)615 static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
616 				kvm_pte_t *ptep)
617 {
618 	bool device = prot & KVM_PGTABLE_PROT_DEVICE;
619 	kvm_pte_t attr = device ? KVM_S2_MEMATTR(pgt, DEVICE_nGnRE) :
620 			    KVM_S2_MEMATTR(pgt, NORMAL);
621 	u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;
622 
623 	if (!(prot & KVM_PGTABLE_PROT_X))
624 		attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
625 	else if (device)
626 		return -EINVAL;
627 
628 	if (prot & KVM_PGTABLE_PROT_R)
629 		attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
630 
631 	if (prot & KVM_PGTABLE_PROT_W)
632 		attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
633 
634 	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
635 	attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
636 	attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW;
637 	*ptep = attr;
638 
639 	return 0;
640 }
641 
kvm_pgtable_stage2_pte_prot(kvm_pte_t pte)642 enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte)
643 {
644 	enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW;
645 
646 	if (!kvm_pte_valid(pte))
647 		return prot;
648 
649 	if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R)
650 		prot |= KVM_PGTABLE_PROT_R;
651 	if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W)
652 		prot |= KVM_PGTABLE_PROT_W;
653 	if (!(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN))
654 		prot |= KVM_PGTABLE_PROT_X;
655 
656 	return prot;
657 }
658 
stage2_pte_needs_update(kvm_pte_t old,kvm_pte_t new)659 static bool stage2_pte_needs_update(kvm_pte_t old, kvm_pte_t new)
660 {
661 	if (!kvm_pte_valid(old) || !kvm_pte_valid(new))
662 		return true;
663 
664 	return ((old ^ new) & (~KVM_PTE_LEAF_ATTR_S2_PERMS));
665 }
666 
stage2_pte_is_counted(kvm_pte_t pte)667 static bool stage2_pte_is_counted(kvm_pte_t pte)
668 {
669 	/*
670 	 * The refcount tracks valid entries as well as invalid entries if they
671 	 * encode ownership of a page to another entity than the page-table
672 	 * owner, whose id is 0.
673 	 */
674 	return !!pte;
675 }
676 
stage2_put_pte(kvm_pte_t * ptep,struct kvm_s2_mmu * mmu,u64 addr,u32 level,struct kvm_pgtable_mm_ops * mm_ops)677 static void stage2_put_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr,
678 			   u32 level, struct kvm_pgtable_mm_ops *mm_ops)
679 {
680 	/*
681 	 * Clear the existing PTE, and perform break-before-make with
682 	 * TLB maintenance if it was valid.
683 	 */
684 	if (kvm_pte_valid(*ptep)) {
685 		kvm_clear_pte(ptep);
686 		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, addr, level);
687 	}
688 
689 	mm_ops->put_page(ptep);
690 }
691 
stage2_pte_cacheable(struct kvm_pgtable * pgt,kvm_pte_t pte)692 static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
693 {
694 	u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
695 	return kvm_pte_valid(pte) && memattr == KVM_S2_MEMATTR(pgt, NORMAL);
696 }
697 
stage2_pte_executable(kvm_pte_t pte)698 static bool stage2_pte_executable(kvm_pte_t pte)
699 {
700 	return kvm_pte_valid(pte) && !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
701 }
702 
stage2_leaf_mapping_allowed(u64 addr,u64 end,u32 level,struct stage2_map_data * data)703 static bool stage2_leaf_mapping_allowed(u64 addr, u64 end, u32 level,
704 					struct stage2_map_data *data)
705 {
706 	if (data->force_pte && (level < (KVM_PGTABLE_MAX_LEVELS - 1)))
707 		return false;
708 
709 	return kvm_block_mapping_supported(addr, end, data->phys, level);
710 }
711 
stage2_map_walker_try_leaf(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,struct stage2_map_data * data)712 static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
713 				      kvm_pte_t *ptep,
714 				      struct stage2_map_data *data)
715 {
716 	kvm_pte_t new, old = *ptep;
717 	u64 granule = kvm_granule_size(level), phys = data->phys;
718 	struct kvm_pgtable *pgt = data->mmu->pgt;
719 	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
720 
721 	if (!stage2_leaf_mapping_allowed(addr, end, level, data))
722 		return -E2BIG;
723 
724 	if (kvm_phys_is_valid(phys))
725 		new = kvm_init_valid_leaf_pte(phys, data->attr, level);
726 	else
727 		new = data->annotation;
728 
729 	if (stage2_pte_is_counted(old)) {
730 		/*
731 		 * Skip updating the PTE if we are trying to recreate the exact
732 		 * same mapping or only change the access permissions. Instead,
733 		 * the vCPU will exit one more time from guest if still needed
734 		 * and then go through the path of relaxing permissions.
735 		 */
736 		if (!stage2_pte_needs_update(old, new))
737 			return -EAGAIN;
738 
739 		/*
740 		 * If we're only changing software bits, then we don't need to
741 		 * do anything else/
742 		 */
743 		if (!((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW))
744 			goto out_set_pte;
745 
746 		stage2_put_pte(ptep, data->mmu, addr, level, mm_ops);
747 	}
748 
749 	/* Perform CMOs before installation of the guest stage-2 PTE */
750 	if (mm_ops->dcache_clean_inval_poc && stage2_pte_cacheable(pgt, new))
751 		mm_ops->dcache_clean_inval_poc(kvm_pte_follow(new, mm_ops),
752 					       granule);
753 	if (mm_ops->icache_inval_pou && stage2_pte_executable(new))
754 		mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule);
755 
756 	if (stage2_pte_is_counted(new))
757 		mm_ops->get_page(ptep);
758 
759 out_set_pte:
760 	smp_store_release(ptep, new);
761 	if (kvm_phys_is_valid(phys))
762 		data->phys += granule;
763 	return 0;
764 }
765 
stage2_map_walk_table_pre(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,struct stage2_map_data * data)766 static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level,
767 				     kvm_pte_t *ptep,
768 				     struct stage2_map_data *data)
769 {
770 	if (data->anchor)
771 		return 0;
772 
773 	if (!stage2_leaf_mapping_allowed(addr, end, level, data))
774 		return 0;
775 
776 	data->childp = kvm_pte_follow(*ptep, data->mm_ops);
777 	kvm_clear_pte(ptep);
778 
779 	/*
780 	 * Invalidate the whole stage-2, as we may have numerous leaf
781 	 * entries below us which would otherwise need invalidating
782 	 * individually.
783 	 */
784 	kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu);
785 	data->anchor = ptep;
786 	return 0;
787 }
788 
stage2_map_walk_leaf(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,struct stage2_map_data * data)789 static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
790 				struct stage2_map_data *data)
791 {
792 	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
793 	kvm_pte_t *childp, pte = *ptep;
794 	int ret;
795 
796 	if (data->anchor) {
797 		if (stage2_pte_is_counted(pte))
798 			mm_ops->put_page(ptep);
799 
800 		return 0;
801 	}
802 
803 	ret = stage2_map_walker_try_leaf(addr, end, level, ptep, data);
804 	if (ret != -E2BIG)
805 		return ret;
806 
807 	if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1))
808 		return -EINVAL;
809 
810 	if (!data->memcache)
811 		return -ENOMEM;
812 
813 	childp = mm_ops->zalloc_page(data->memcache);
814 	if (!childp)
815 		return -ENOMEM;
816 
817 	/*
818 	 * If we've run into an existing block mapping then replace it with
819 	 * a table. Accesses beyond 'end' that fall within the new table
820 	 * will be mapped lazily.
821 	 */
822 	if (stage2_pte_is_counted(pte))
823 		stage2_put_pte(ptep, data->mmu, addr, level, mm_ops);
824 
825 	kvm_set_table_pte(ptep, childp, mm_ops);
826 	mm_ops->get_page(ptep);
827 
828 	return 0;
829 }
830 
stage2_map_walk_table_post(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,struct stage2_map_data * data)831 static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level,
832 				      kvm_pte_t *ptep,
833 				      struct stage2_map_data *data)
834 {
835 	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
836 	kvm_pte_t *childp;
837 	int ret = 0;
838 
839 	if (!data->anchor)
840 		return 0;
841 
842 	if (data->anchor == ptep) {
843 		childp = data->childp;
844 		data->anchor = NULL;
845 		data->childp = NULL;
846 		ret = stage2_map_walk_leaf(addr, end, level, ptep, data);
847 	} else {
848 		childp = kvm_pte_follow(*ptep, mm_ops);
849 	}
850 
851 	mm_ops->put_page(childp);
852 	mm_ops->put_page(ptep);
853 
854 	return ret;
855 }
856 
857 /*
858  * This is a little fiddly, as we use all three of the walk flags. The idea
859  * is that the TABLE_PRE callback runs for table entries on the way down,
860  * looking for table entries which we could conceivably replace with a
861  * block entry for this mapping. If it finds one, then it sets the 'anchor'
862  * field in 'struct stage2_map_data' to point at the table entry, before
863  * clearing the entry to zero and descending into the now detached table.
864  *
865  * The behaviour of the LEAF callback then depends on whether or not the
866  * anchor has been set. If not, then we're not using a block mapping higher
867  * up the table and we perform the mapping at the existing leaves instead.
868  * If, on the other hand, the anchor _is_ set, then we drop references to
869  * all valid leaves so that the pages beneath the anchor can be freed.
870  *
871  * Finally, the TABLE_POST callback does nothing if the anchor has not
872  * been set, but otherwise frees the page-table pages while walking back up
873  * the page-table, installing the block entry when it revisits the anchor
874  * pointer and clearing the anchor to NULL.
875  */
stage2_map_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)876 static int stage2_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
877 			     enum kvm_pgtable_walk_flags flag, void * const arg)
878 {
879 	struct stage2_map_data *data = arg;
880 
881 	switch (flag) {
882 	case KVM_PGTABLE_WALK_TABLE_PRE:
883 		return stage2_map_walk_table_pre(addr, end, level, ptep, data);
884 	case KVM_PGTABLE_WALK_LEAF:
885 		return stage2_map_walk_leaf(addr, end, level, ptep, data);
886 	case KVM_PGTABLE_WALK_TABLE_POST:
887 		return stage2_map_walk_table_post(addr, end, level, ptep, data);
888 	}
889 
890 	return -EINVAL;
891 }
892 
kvm_pgtable_stage2_map(struct kvm_pgtable * pgt,u64 addr,u64 size,u64 phys,enum kvm_pgtable_prot prot,void * mc)893 int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
894 			   u64 phys, enum kvm_pgtable_prot prot,
895 			   void *mc)
896 {
897 	int ret;
898 	struct stage2_map_data map_data = {
899 		.phys		= ALIGN_DOWN(phys, PAGE_SIZE),
900 		.mmu		= pgt->mmu,
901 		.memcache	= mc,
902 		.mm_ops		= pgt->mm_ops,
903 		.force_pte	= pgt->force_pte_cb && pgt->force_pte_cb(addr, addr + size, prot),
904 	};
905 	struct kvm_pgtable_walker walker = {
906 		.cb		= stage2_map_walker,
907 		.flags		= KVM_PGTABLE_WALK_TABLE_PRE |
908 				  KVM_PGTABLE_WALK_LEAF |
909 				  KVM_PGTABLE_WALK_TABLE_POST,
910 		.arg		= &map_data,
911 	};
912 
913 	if (WARN_ON((pgt->flags & KVM_PGTABLE_S2_IDMAP) && (addr != phys)))
914 		return -EINVAL;
915 
916 	ret = stage2_set_prot_attr(pgt, prot, &map_data.attr);
917 	if (ret)
918 		return ret;
919 
920 	ret = kvm_pgtable_walk(pgt, addr, size, &walker);
921 	dsb(ishst);
922 	return ret;
923 }
924 
kvm_pgtable_stage2_annotate(struct kvm_pgtable * pgt,u64 addr,u64 size,void * mc,kvm_pte_t annotation)925 int kvm_pgtable_stage2_annotate(struct kvm_pgtable *pgt, u64 addr, u64 size,
926 				void *mc, kvm_pte_t annotation)
927 {
928 	int ret;
929 	struct stage2_map_data map_data = {
930 		.phys		= KVM_PHYS_INVALID,
931 		.mmu		= pgt->mmu,
932 		.memcache	= mc,
933 		.mm_ops		= pgt->mm_ops,
934 		.force_pte	= true,
935 		.annotation	= annotation,
936 	};
937 	struct kvm_pgtable_walker walker = {
938 		.cb		= stage2_map_walker,
939 		.flags		= KVM_PGTABLE_WALK_TABLE_PRE |
940 				  KVM_PGTABLE_WALK_LEAF |
941 				  KVM_PGTABLE_WALK_TABLE_POST,
942 		.arg		= &map_data,
943 	};
944 
945 	if (annotation & PTE_VALID)
946 		return -EINVAL;
947 
948 	ret = kvm_pgtable_walk(pgt, addr, size, &walker);
949 	return ret;
950 }
951 
stage2_unmap_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)952 static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
953 			       enum kvm_pgtable_walk_flags flag,
954 			       void * const arg)
955 {
956 	struct kvm_pgtable *pgt = arg;
957 	struct kvm_s2_mmu *mmu = pgt->mmu;
958 	struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
959 	kvm_pte_t pte = *ptep, *childp = NULL;
960 	bool need_flush = false;
961 
962 	if (!kvm_pte_valid(pte)) {
963 		if (stage2_pte_is_counted(pte)) {
964 			kvm_clear_pte(ptep);
965 			mm_ops->put_page(ptep);
966 		}
967 		return 0;
968 	}
969 
970 	if (kvm_pte_table(pte, level)) {
971 		childp = kvm_pte_follow(pte, mm_ops);
972 
973 		if (mm_ops->page_count(childp) != 1)
974 			return 0;
975 	} else if (stage2_pte_cacheable(pgt, pte)) {
976 		need_flush = !stage2_has_fwb(pgt);
977 	}
978 
979 	/*
980 	 * This is similar to the map() path in that we unmap the entire
981 	 * block entry and rely on the remaining portions being faulted
982 	 * back lazily.
983 	 */
984 	stage2_put_pte(ptep, mmu, addr, level, mm_ops);
985 
986 	if (need_flush && mm_ops->dcache_clean_inval_poc)
987 		mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
988 					       kvm_granule_size(level));
989 
990 	if (childp)
991 		mm_ops->put_page(childp);
992 
993 	return 0;
994 }
995 
kvm_pgtable_stage2_unmap(struct kvm_pgtable * pgt,u64 addr,u64 size)996 int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
997 {
998 	struct kvm_pgtable_walker walker = {
999 		.cb	= stage2_unmap_walker,
1000 		.arg	= pgt,
1001 		.flags	= KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
1002 	};
1003 
1004 	return kvm_pgtable_walk(pgt, addr, size, &walker);
1005 }
1006 
1007 struct stage2_attr_data {
1008 	kvm_pte_t			attr_set;
1009 	kvm_pte_t			attr_clr;
1010 	kvm_pte_t			pte;
1011 	u32				level;
1012 	struct kvm_pgtable_mm_ops	*mm_ops;
1013 };
1014 
stage2_attr_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)1015 static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
1016 			      enum kvm_pgtable_walk_flags flag,
1017 			      void * const arg)
1018 {
1019 	kvm_pte_t pte = *ptep;
1020 	struct stage2_attr_data *data = arg;
1021 	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
1022 
1023 	if (!kvm_pte_valid(pte))
1024 		return 0;
1025 
1026 	data->level = level;
1027 	data->pte = pte;
1028 	pte &= ~data->attr_clr;
1029 	pte |= data->attr_set;
1030 
1031 	/*
1032 	 * We may race with the CPU trying to set the access flag here,
1033 	 * but worst-case the access flag update gets lost and will be
1034 	 * set on the next access instead.
1035 	 */
1036 	if (data->pte != pte) {
1037 		/*
1038 		 * Invalidate instruction cache before updating the guest
1039 		 * stage-2 PTE if we are going to add executable permission.
1040 		 */
1041 		if (mm_ops->icache_inval_pou &&
1042 		    stage2_pte_executable(pte) && !stage2_pte_executable(*ptep))
1043 			mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops),
1044 						  kvm_granule_size(level));
1045 		WRITE_ONCE(*ptep, pte);
1046 	}
1047 
1048 	return 0;
1049 }
1050 
stage2_update_leaf_attrs(struct kvm_pgtable * pgt,u64 addr,u64 size,kvm_pte_t attr_set,kvm_pte_t attr_clr,kvm_pte_t * orig_pte,u32 * level)1051 static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
1052 				    u64 size, kvm_pte_t attr_set,
1053 				    kvm_pte_t attr_clr, kvm_pte_t *orig_pte,
1054 				    u32 *level)
1055 {
1056 	int ret;
1057 	kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI;
1058 	struct stage2_attr_data data = {
1059 		.attr_set	= attr_set & attr_mask,
1060 		.attr_clr	= attr_clr & attr_mask,
1061 		.mm_ops		= pgt->mm_ops,
1062 	};
1063 	struct kvm_pgtable_walker walker = {
1064 		.cb		= stage2_attr_walker,
1065 		.arg		= &data,
1066 		.flags		= KVM_PGTABLE_WALK_LEAF,
1067 	};
1068 
1069 	ret = kvm_pgtable_walk(pgt, addr, size, &walker);
1070 	if (ret)
1071 		return ret;
1072 
1073 	if (orig_pte)
1074 		*orig_pte = data.pte;
1075 
1076 	if (level)
1077 		*level = data.level;
1078 	return 0;
1079 }
1080 
kvm_pgtable_stage2_wrprotect(struct kvm_pgtable * pgt,u64 addr,u64 size)1081 int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
1082 {
1083 	return stage2_update_leaf_attrs(pgt, addr, size, 0,
1084 					KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
1085 					NULL, NULL);
1086 }
1087 
kvm_pgtable_stage2_mkyoung(struct kvm_pgtable * pgt,u64 addr)1088 kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr)
1089 {
1090 	kvm_pte_t pte = 0;
1091 	stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0,
1092 				 &pte, NULL);
1093 	dsb(ishst);
1094 	return pte;
1095 }
1096 
kvm_pgtable_stage2_mkold(struct kvm_pgtable * pgt,u64 addr)1097 kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr)
1098 {
1099 	kvm_pte_t pte = 0;
1100 	stage2_update_leaf_attrs(pgt, addr, 1, 0, KVM_PTE_LEAF_ATTR_LO_S2_AF,
1101 				 &pte, NULL);
1102 	/*
1103 	 * "But where's the TLBI?!", you scream.
1104 	 * "Over in the core code", I sigh.
1105 	 *
1106 	 * See the '->clear_flush_young()' callback on the KVM mmu notifier.
1107 	 */
1108 	return pte;
1109 }
1110 
kvm_pgtable_stage2_is_young(struct kvm_pgtable * pgt,u64 addr)1111 bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr)
1112 {
1113 	kvm_pte_t pte = 0;
1114 	stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL);
1115 	return pte & KVM_PTE_LEAF_ATTR_LO_S2_AF;
1116 }
1117 
kvm_pgtable_stage2_relax_perms(struct kvm_pgtable * pgt,u64 addr,enum kvm_pgtable_prot prot)1118 int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
1119 				   enum kvm_pgtable_prot prot)
1120 {
1121 	int ret;
1122 	u32 level;
1123 	kvm_pte_t set = 0, clr = 0;
1124 
1125 	if (prot & KVM_PTE_LEAF_ATTR_HI_SW)
1126 		return -EINVAL;
1127 
1128 	if (prot & KVM_PGTABLE_PROT_R)
1129 		set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
1130 
1131 	if (prot & KVM_PGTABLE_PROT_W)
1132 		set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
1133 
1134 	if (prot & KVM_PGTABLE_PROT_X)
1135 		clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
1136 
1137 	ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level);
1138 	if (!ret)
1139 		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level);
1140 	return ret;
1141 }
1142 
stage2_flush_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)1143 static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
1144 			       enum kvm_pgtable_walk_flags flag,
1145 			       void * const arg)
1146 {
1147 	struct kvm_pgtable *pgt = arg;
1148 	struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
1149 	kvm_pte_t pte = *ptep;
1150 
1151 	if (!stage2_pte_cacheable(pgt, pte))
1152 		return 0;
1153 
1154 	if (mm_ops->dcache_clean_inval_poc)
1155 		mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
1156 					       kvm_granule_size(level));
1157 	return 0;
1158 }
1159 
kvm_pgtable_stage2_flush(struct kvm_pgtable * pgt,u64 addr,u64 size)1160 int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
1161 {
1162 	struct kvm_pgtable_walker walker = {
1163 		.cb	= stage2_flush_walker,
1164 		.flags	= KVM_PGTABLE_WALK_LEAF,
1165 		.arg	= pgt,
1166 	};
1167 
1168 	if (stage2_has_fwb(pgt))
1169 		return 0;
1170 
1171 	return kvm_pgtable_walk(pgt, addr, size, &walker);
1172 }
1173 
1174 
__kvm_pgtable_stage2_init(struct kvm_pgtable * pgt,struct kvm_s2_mmu * mmu,struct kvm_pgtable_mm_ops * mm_ops,enum kvm_pgtable_stage2_flags flags,kvm_pgtable_force_pte_cb_t force_pte_cb)1175 int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
1176 			      struct kvm_pgtable_mm_ops *mm_ops,
1177 			      enum kvm_pgtable_stage2_flags flags,
1178 			      kvm_pgtable_force_pte_cb_t force_pte_cb)
1179 {
1180 	size_t pgd_sz;
1181 	u64 vtcr = mmu->arch->vtcr;
1182 	u32 ia_bits = VTCR_EL2_IPA(vtcr);
1183 	u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
1184 	u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
1185 
1186 	pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
1187 	pgt->pgd = mm_ops->zalloc_pages_exact(pgd_sz);
1188 	if (!pgt->pgd)
1189 		return -ENOMEM;
1190 
1191 	pgt->ia_bits		= ia_bits;
1192 	pgt->start_level	= start_level;
1193 	pgt->mm_ops		= mm_ops;
1194 	pgt->mmu		= mmu;
1195 	pgt->flags		= flags;
1196 	pgt->force_pte_cb	= force_pte_cb;
1197 
1198 	/* Ensure zeroed PGD pages are visible to the hardware walker */
1199 	dsb(ishst);
1200 	return 0;
1201 }
1202 
kvm_pgtable_stage2_pgd_size(u64 vtcr)1203 size_t kvm_pgtable_stage2_pgd_size(u64 vtcr)
1204 {
1205 	u32 ia_bits = VTCR_EL2_IPA(vtcr);
1206 	u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
1207 	u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
1208 
1209 	return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
1210 }
1211 
stage2_free_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)1212 static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
1213 			      enum kvm_pgtable_walk_flags flag,
1214 			      void * const arg)
1215 {
1216 	struct kvm_pgtable_mm_ops *mm_ops = arg;
1217 	kvm_pte_t pte = *ptep;
1218 
1219 	if (!stage2_pte_is_counted(pte))
1220 		return 0;
1221 
1222 	mm_ops->put_page(ptep);
1223 
1224 	if (kvm_pte_table(pte, level))
1225 		mm_ops->put_page(kvm_pte_follow(pte, mm_ops));
1226 
1227 	return 0;
1228 }
1229 
kvm_pgtable_stage2_destroy(struct kvm_pgtable * pgt)1230 void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
1231 {
1232 	size_t pgd_sz;
1233 	struct kvm_pgtable_walker walker = {
1234 		.cb	= stage2_free_walker,
1235 		.flags	= KVM_PGTABLE_WALK_LEAF |
1236 			  KVM_PGTABLE_WALK_TABLE_POST,
1237 		.arg	= pgt->mm_ops,
1238 	};
1239 
1240 	WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
1241 	pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE;
1242 	pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz);
1243 	pgt->pgd = NULL;
1244 }
1245