• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Stand-alone page-table allocator for hyp stage-1 and guest stage-2.
4  * No bombay mix was harmed in the writing of this file.
5  *
6  * Copyright (C) 2020 Google LLC
7  * Author: Will Deacon <will@kernel.org>
8  */
9 
10 #include <linux/bitfield.h>
11 #include <asm/kvm_pgtable.h>
12 
13 #define KVM_PGTABLE_MAX_LEVELS		4U
14 
15 #define KVM_PTE_VALID			BIT(0)
16 
17 #define KVM_PTE_TYPE			BIT(1)
18 #define KVM_PTE_TYPE_BLOCK		0
19 #define KVM_PTE_TYPE_PAGE		1
20 #define KVM_PTE_TYPE_TABLE		1
21 
22 #define KVM_PTE_ADDR_MASK		GENMASK(47, PAGE_SHIFT)
23 #define KVM_PTE_ADDR_51_48		GENMASK(15, 12)
24 
25 #define KVM_PTE_LEAF_ATTR_LO		GENMASK(11, 2)
26 
27 #define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX	GENMASK(4, 2)
28 #define KVM_PTE_LEAF_ATTR_LO_S1_AP	GENMASK(7, 6)
29 #define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO	3
30 #define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW	1
31 #define KVM_PTE_LEAF_ATTR_LO_S1_SH	GENMASK(9, 8)
32 #define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS	3
33 #define KVM_PTE_LEAF_ATTR_LO_S1_AF	BIT(10)
34 
35 #define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR	GENMASK(5, 2)
36 #define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R	BIT(6)
37 #define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W	BIT(7)
38 #define KVM_PTE_LEAF_ATTR_LO_S2_SH	GENMASK(9, 8)
39 #define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS	3
40 #define KVM_PTE_LEAF_ATTR_LO_S2_AF	BIT(10)
41 
42 #define KVM_PTE_LEAF_ATTR_HI		GENMASK(63, 51)
43 
44 #define KVM_PTE_LEAF_ATTR_HI_S1_XN	BIT(54)
45 
46 #define KVM_PTE_LEAF_ATTR_HI_S2_XN	BIT(54)
47 
48 struct kvm_pgtable_walk_data {
49 	struct kvm_pgtable		*pgt;
50 	struct kvm_pgtable_walker	*walker;
51 
52 	u64				addr;
53 	u64				end;
54 };
55 
kvm_granule_shift(u32 level)56 static u64 kvm_granule_shift(u32 level)
57 {
58 	/* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */
59 	return ARM64_HW_PGTABLE_LEVEL_SHIFT(level);
60 }
61 
kvm_granule_size(u32 level)62 static u64 kvm_granule_size(u32 level)
63 {
64 	return BIT(kvm_granule_shift(level));
65 }
66 
kvm_block_mapping_supported(u64 addr,u64 end,u64 phys,u32 level)67 static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level)
68 {
69 	u64 granule = kvm_granule_size(level);
70 
71 	/*
72 	 * Reject invalid block mappings and don't bother with 4TB mappings for
73 	 * 52-bit PAs.
74 	 */
75 	if (level == 0 || (PAGE_SIZE != SZ_4K && level == 1))
76 		return false;
77 
78 	if (granule > (end - addr))
79 		return false;
80 
81 	return IS_ALIGNED(addr, granule) && IS_ALIGNED(phys, granule);
82 }
83 
kvm_pgtable_idx(struct kvm_pgtable_walk_data * data,u32 level)84 static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level)
85 {
86 	u64 shift = kvm_granule_shift(level);
87 	u64 mask = BIT(PAGE_SHIFT - 3) - 1;
88 
89 	return (data->addr >> shift) & mask;
90 }
91 
__kvm_pgd_page_idx(struct kvm_pgtable * pgt,u64 addr)92 static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr)
93 {
94 	u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */
95 	u64 mask = BIT(pgt->ia_bits) - 1;
96 
97 	return (addr & mask) >> shift;
98 }
99 
kvm_pgd_page_idx(struct kvm_pgtable_walk_data * data)100 static u32 kvm_pgd_page_idx(struct kvm_pgtable_walk_data *data)
101 {
102 	return __kvm_pgd_page_idx(data->pgt, data->addr);
103 }
104 
kvm_pgd_pages(u32 ia_bits,u32 start_level)105 static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
106 {
107 	struct kvm_pgtable pgt = {
108 		.ia_bits	= ia_bits,
109 		.start_level	= start_level,
110 	};
111 
112 	return __kvm_pgd_page_idx(&pgt, -1ULL) + 1;
113 }
114 
kvm_pte_valid(kvm_pte_t pte)115 static bool kvm_pte_valid(kvm_pte_t pte)
116 {
117 	return pte & KVM_PTE_VALID;
118 }
119 
kvm_pte_table(kvm_pte_t pte,u32 level)120 static bool kvm_pte_table(kvm_pte_t pte, u32 level)
121 {
122 	if (level == KVM_PGTABLE_MAX_LEVELS - 1)
123 		return false;
124 
125 	if (!kvm_pte_valid(pte))
126 		return false;
127 
128 	return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE;
129 }
130 
kvm_pte_to_phys(kvm_pte_t pte)131 static u64 kvm_pte_to_phys(kvm_pte_t pte)
132 {
133 	u64 pa = pte & KVM_PTE_ADDR_MASK;
134 
135 	if (PAGE_SHIFT == 16)
136 		pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
137 
138 	return pa;
139 }
140 
kvm_phys_to_pte(u64 pa)141 static kvm_pte_t kvm_phys_to_pte(u64 pa)
142 {
143 	kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK;
144 
145 	if (PAGE_SHIFT == 16)
146 		pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48);
147 
148 	return pte;
149 }
150 
kvm_pte_follow(kvm_pte_t pte)151 static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte)
152 {
153 	return __va(kvm_pte_to_phys(pte));
154 }
155 
kvm_set_invalid_pte(kvm_pte_t * ptep)156 static void kvm_set_invalid_pte(kvm_pte_t *ptep)
157 {
158 	kvm_pte_t pte = *ptep;
159 	WRITE_ONCE(*ptep, pte & ~KVM_PTE_VALID);
160 }
161 
kvm_set_table_pte(kvm_pte_t * ptep,kvm_pte_t * childp)162 static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp)
163 {
164 	kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(__pa(childp));
165 
166 	pte |= FIELD_PREP(KVM_PTE_TYPE, KVM_PTE_TYPE_TABLE);
167 	pte |= KVM_PTE_VALID;
168 
169 	WARN_ON(kvm_pte_valid(old));
170 	smp_store_release(ptep, pte);
171 }
172 
kvm_set_valid_leaf_pte(kvm_pte_t * ptep,u64 pa,kvm_pte_t attr,u32 level)173 static bool kvm_set_valid_leaf_pte(kvm_pte_t *ptep, u64 pa, kvm_pte_t attr,
174 				   u32 level)
175 {
176 	kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(pa);
177 	u64 type = (level == KVM_PGTABLE_MAX_LEVELS - 1) ? KVM_PTE_TYPE_PAGE :
178 							   KVM_PTE_TYPE_BLOCK;
179 
180 	pte |= attr & (KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI);
181 	pte |= FIELD_PREP(KVM_PTE_TYPE, type);
182 	pte |= KVM_PTE_VALID;
183 
184 	/* Tolerate KVM recreating the exact same mapping. */
185 	if (kvm_pte_valid(old))
186 		return old == pte;
187 
188 	smp_store_release(ptep, pte);
189 	return true;
190 }
191 
kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data * data,u64 addr,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag)192 static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr,
193 				  u32 level, kvm_pte_t *ptep,
194 				  enum kvm_pgtable_walk_flags flag)
195 {
196 	struct kvm_pgtable_walker *walker = data->walker;
197 	return walker->cb(addr, data->end, level, ptep, flag, walker->arg);
198 }
199 
200 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
201 			      kvm_pte_t *pgtable, u32 level);
202 
__kvm_pgtable_visit(struct kvm_pgtable_walk_data * data,kvm_pte_t * ptep,u32 level)203 static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
204 				      kvm_pte_t *ptep, u32 level)
205 {
206 	int ret = 0;
207 	u64 addr = data->addr;
208 	kvm_pte_t *childp, pte = *ptep;
209 	bool table = kvm_pte_table(pte, level);
210 	enum kvm_pgtable_walk_flags flags = data->walker->flags;
211 
212 	if (table && (flags & KVM_PGTABLE_WALK_TABLE_PRE)) {
213 		ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
214 					     KVM_PGTABLE_WALK_TABLE_PRE);
215 	}
216 
217 	if (!table && (flags & KVM_PGTABLE_WALK_LEAF)) {
218 		ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
219 					     KVM_PGTABLE_WALK_LEAF);
220 		pte = *ptep;
221 		table = kvm_pte_table(pte, level);
222 	}
223 
224 	if (ret)
225 		goto out;
226 
227 	if (!table) {
228 		data->addr = ALIGN_DOWN(data->addr, kvm_granule_size(level));
229 		data->addr += kvm_granule_size(level);
230 		goto out;
231 	}
232 
233 	childp = kvm_pte_follow(pte);
234 	ret = __kvm_pgtable_walk(data, childp, level + 1);
235 	if (ret)
236 		goto out;
237 
238 	if (flags & KVM_PGTABLE_WALK_TABLE_POST) {
239 		ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
240 					     KVM_PGTABLE_WALK_TABLE_POST);
241 	}
242 
243 out:
244 	return ret;
245 }
246 
__kvm_pgtable_walk(struct kvm_pgtable_walk_data * data,kvm_pte_t * pgtable,u32 level)247 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
248 			      kvm_pte_t *pgtable, u32 level)
249 {
250 	u32 idx;
251 	int ret = 0;
252 
253 	if (WARN_ON_ONCE(level >= KVM_PGTABLE_MAX_LEVELS))
254 		return -EINVAL;
255 
256 	for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) {
257 		kvm_pte_t *ptep = &pgtable[idx];
258 
259 		if (data->addr >= data->end)
260 			break;
261 
262 		ret = __kvm_pgtable_visit(data, ptep, level);
263 		if (ret)
264 			break;
265 	}
266 
267 	return ret;
268 }
269 
_kvm_pgtable_walk(struct kvm_pgtable_walk_data * data)270 static int _kvm_pgtable_walk(struct kvm_pgtable_walk_data *data)
271 {
272 	u32 idx;
273 	int ret = 0;
274 	struct kvm_pgtable *pgt = data->pgt;
275 	u64 limit = BIT(pgt->ia_bits);
276 
277 	if (data->addr > limit || data->end > limit)
278 		return -ERANGE;
279 
280 	if (!pgt->pgd)
281 		return -EINVAL;
282 
283 	for (idx = kvm_pgd_page_idx(data); data->addr < data->end; ++idx) {
284 		kvm_pte_t *ptep = &pgt->pgd[idx * PTRS_PER_PTE];
285 
286 		ret = __kvm_pgtable_walk(data, ptep, pgt->start_level);
287 		if (ret)
288 			break;
289 	}
290 
291 	return ret;
292 }
293 
kvm_pgtable_walk(struct kvm_pgtable * pgt,u64 addr,u64 size,struct kvm_pgtable_walker * walker)294 int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
295 		     struct kvm_pgtable_walker *walker)
296 {
297 	struct kvm_pgtable_walk_data walk_data = {
298 		.pgt	= pgt,
299 		.addr	= ALIGN_DOWN(addr, PAGE_SIZE),
300 		.end	= PAGE_ALIGN(walk_data.addr + size),
301 		.walker	= walker,
302 	};
303 
304 	return _kvm_pgtable_walk(&walk_data);
305 }
306 
307 struct hyp_map_data {
308 	u64		phys;
309 	kvm_pte_t	attr;
310 };
311 
hyp_map_set_prot_attr(enum kvm_pgtable_prot prot,struct hyp_map_data * data)312 static int hyp_map_set_prot_attr(enum kvm_pgtable_prot prot,
313 				 struct hyp_map_data *data)
314 {
315 	bool device = prot & KVM_PGTABLE_PROT_DEVICE;
316 	u32 mtype = device ? MT_DEVICE_nGnRE : MT_NORMAL;
317 	kvm_pte_t attr = FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX, mtype);
318 	u32 sh = KVM_PTE_LEAF_ATTR_LO_S1_SH_IS;
319 	u32 ap = (prot & KVM_PGTABLE_PROT_W) ? KVM_PTE_LEAF_ATTR_LO_S1_AP_RW :
320 					       KVM_PTE_LEAF_ATTR_LO_S1_AP_RO;
321 
322 	if (!(prot & KVM_PGTABLE_PROT_R))
323 		return -EINVAL;
324 
325 	if (prot & KVM_PGTABLE_PROT_X) {
326 		if (prot & KVM_PGTABLE_PROT_W)
327 			return -EINVAL;
328 
329 		if (device)
330 			return -EINVAL;
331 	} else {
332 		attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN;
333 	}
334 
335 	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap);
336 	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
337 	attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF;
338 	data->attr = attr;
339 	return 0;
340 }
341 
hyp_map_walker_try_leaf(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,struct hyp_map_data * data)342 static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
343 				    kvm_pte_t *ptep, struct hyp_map_data *data)
344 {
345 	u64 granule = kvm_granule_size(level), phys = data->phys;
346 
347 	if (!kvm_block_mapping_supported(addr, end, phys, level))
348 		return false;
349 
350 	WARN_ON(!kvm_set_valid_leaf_pte(ptep, phys, data->attr, level));
351 	data->phys += granule;
352 	return true;
353 }
354 
hyp_map_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)355 static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
356 			  enum kvm_pgtable_walk_flags flag, void * const arg)
357 {
358 	kvm_pte_t *childp;
359 
360 	if (hyp_map_walker_try_leaf(addr, end, level, ptep, arg))
361 		return 0;
362 
363 	if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1))
364 		return -EINVAL;
365 
366 	childp = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL);
367 	if (!childp)
368 		return -ENOMEM;
369 
370 	kvm_set_table_pte(ptep, childp);
371 	return 0;
372 }
373 
kvm_pgtable_hyp_map(struct kvm_pgtable * pgt,u64 addr,u64 size,u64 phys,enum kvm_pgtable_prot prot)374 int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
375 			enum kvm_pgtable_prot prot)
376 {
377 	int ret;
378 	struct hyp_map_data map_data = {
379 		.phys	= ALIGN_DOWN(phys, PAGE_SIZE),
380 	};
381 	struct kvm_pgtable_walker walker = {
382 		.cb	= hyp_map_walker,
383 		.flags	= KVM_PGTABLE_WALK_LEAF,
384 		.arg	= &map_data,
385 	};
386 
387 	ret = hyp_map_set_prot_attr(prot, &map_data);
388 	if (ret)
389 		return ret;
390 
391 	ret = kvm_pgtable_walk(pgt, addr, size, &walker);
392 	dsb(ishst);
393 	isb();
394 	return ret;
395 }
396 
kvm_pgtable_hyp_init(struct kvm_pgtable * pgt,u32 va_bits)397 int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits)
398 {
399 	u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits);
400 
401 	pgt->pgd = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL);
402 	if (!pgt->pgd)
403 		return -ENOMEM;
404 
405 	pgt->ia_bits		= va_bits;
406 	pgt->start_level	= KVM_PGTABLE_MAX_LEVELS - levels;
407 	pgt->mmu		= NULL;
408 	return 0;
409 }
410 
hyp_free_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)411 static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
412 			   enum kvm_pgtable_walk_flags flag, void * const arg)
413 {
414 	free_page((unsigned long)kvm_pte_follow(*ptep));
415 	return 0;
416 }
417 
kvm_pgtable_hyp_destroy(struct kvm_pgtable * pgt)418 void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
419 {
420 	struct kvm_pgtable_walker walker = {
421 		.cb	= hyp_free_walker,
422 		.flags	= KVM_PGTABLE_WALK_TABLE_POST,
423 	};
424 
425 	WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
426 	free_page((unsigned long)pgt->pgd);
427 	pgt->pgd = NULL;
428 }
429 
430 struct stage2_map_data {
431 	u64				phys;
432 	kvm_pte_t			attr;
433 
434 	kvm_pte_t			*anchor;
435 
436 	struct kvm_s2_mmu		*mmu;
437 	struct kvm_mmu_memory_cache	*memcache;
438 };
439 
stage2_map_set_prot_attr(enum kvm_pgtable_prot prot,struct stage2_map_data * data)440 static int stage2_map_set_prot_attr(enum kvm_pgtable_prot prot,
441 				    struct stage2_map_data *data)
442 {
443 	bool device = prot & KVM_PGTABLE_PROT_DEVICE;
444 	kvm_pte_t attr = device ? PAGE_S2_MEMATTR(DEVICE_nGnRE) :
445 			    PAGE_S2_MEMATTR(NORMAL);
446 	u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;
447 
448 	if (!(prot & KVM_PGTABLE_PROT_X))
449 		attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
450 	else if (device)
451 		return -EINVAL;
452 
453 	if (prot & KVM_PGTABLE_PROT_R)
454 		attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
455 
456 	if (prot & KVM_PGTABLE_PROT_W)
457 		attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
458 
459 	attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
460 	attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
461 	data->attr = attr;
462 	return 0;
463 }
464 
stage2_map_walker_try_leaf(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,struct stage2_map_data * data)465 static bool stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
466 				       kvm_pte_t *ptep,
467 				       struct stage2_map_data *data)
468 {
469 	u64 granule = kvm_granule_size(level), phys = data->phys;
470 
471 	if (!kvm_block_mapping_supported(addr, end, phys, level))
472 		return false;
473 
474 	/*
475 	 * If the PTE was already valid, drop the refcount on the table
476 	 * early, as it will be bumped-up again in stage2_map_walk_leaf().
477 	 * This ensures that the refcount stays constant across a valid to
478 	 * valid PTE update.
479 	 */
480 	if (kvm_pte_valid(*ptep))
481 		put_page(virt_to_page(ptep));
482 
483 	if (kvm_set_valid_leaf_pte(ptep, phys, data->attr, level))
484 		goto out;
485 
486 	/* There's an existing valid leaf entry, so perform break-before-make */
487 	kvm_set_invalid_pte(ptep);
488 	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level);
489 	kvm_set_valid_leaf_pte(ptep, phys, data->attr, level);
490 out:
491 	data->phys += granule;
492 	return true;
493 }
494 
stage2_map_walk_table_pre(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,struct stage2_map_data * data)495 static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level,
496 				     kvm_pte_t *ptep,
497 				     struct stage2_map_data *data)
498 {
499 	if (data->anchor)
500 		return 0;
501 
502 	if (!kvm_block_mapping_supported(addr, end, data->phys, level))
503 		return 0;
504 
505 	kvm_set_invalid_pte(ptep);
506 
507 	/*
508 	 * Invalidate the whole stage-2, as we may have numerous leaf
509 	 * entries below us which would otherwise need invalidating
510 	 * individually.
511 	 */
512 	kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu);
513 	data->anchor = ptep;
514 	return 0;
515 }
516 
stage2_map_walk_leaf(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,struct stage2_map_data * data)517 static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
518 				struct stage2_map_data *data)
519 {
520 	kvm_pte_t *childp, pte = *ptep;
521 	struct page *page = virt_to_page(ptep);
522 
523 	if (data->anchor) {
524 		if (kvm_pte_valid(pte))
525 			put_page(page);
526 
527 		return 0;
528 	}
529 
530 	if (stage2_map_walker_try_leaf(addr, end, level, ptep, data))
531 		goto out_get_page;
532 
533 	if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1))
534 		return -EINVAL;
535 
536 	if (!data->memcache)
537 		return -ENOMEM;
538 
539 	childp = kvm_mmu_memory_cache_alloc(data->memcache);
540 	if (!childp)
541 		return -ENOMEM;
542 
543 	/*
544 	 * If we've run into an existing block mapping then replace it with
545 	 * a table. Accesses beyond 'end' that fall within the new table
546 	 * will be mapped lazily.
547 	 */
548 	if (kvm_pte_valid(pte)) {
549 		kvm_set_invalid_pte(ptep);
550 		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level);
551 		put_page(page);
552 	}
553 
554 	kvm_set_table_pte(ptep, childp);
555 
556 out_get_page:
557 	get_page(page);
558 	return 0;
559 }
560 
stage2_map_walk_table_post(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,struct stage2_map_data * data)561 static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level,
562 				      kvm_pte_t *ptep,
563 				      struct stage2_map_data *data)
564 {
565 	int ret = 0;
566 
567 	if (!data->anchor)
568 		return 0;
569 
570 	free_page((unsigned long)kvm_pte_follow(*ptep));
571 	put_page(virt_to_page(ptep));
572 
573 	if (data->anchor == ptep) {
574 		data->anchor = NULL;
575 		ret = stage2_map_walk_leaf(addr, end, level, ptep, data);
576 	}
577 
578 	return ret;
579 }
580 
581 /*
582  * This is a little fiddly, as we use all three of the walk flags. The idea
583  * is that the TABLE_PRE callback runs for table entries on the way down,
584  * looking for table entries which we could conceivably replace with a
585  * block entry for this mapping. If it finds one, then it sets the 'anchor'
586  * field in 'struct stage2_map_data' to point at the table entry, before
587  * clearing the entry to zero and descending into the now detached table.
588  *
589  * The behaviour of the LEAF callback then depends on whether or not the
590  * anchor has been set. If not, then we're not using a block mapping higher
591  * up the table and we perform the mapping at the existing leaves instead.
592  * If, on the other hand, the anchor _is_ set, then we drop references to
593  * all valid leaves so that the pages beneath the anchor can be freed.
594  *
595  * Finally, the TABLE_POST callback does nothing if the anchor has not
596  * been set, but otherwise frees the page-table pages while walking back up
597  * the page-table, installing the block entry when it revisits the anchor
598  * pointer and clearing the anchor to NULL.
599  */
stage2_map_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)600 static int stage2_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
601 			     enum kvm_pgtable_walk_flags flag, void * const arg)
602 {
603 	struct stage2_map_data *data = arg;
604 
605 	switch (flag) {
606 	case KVM_PGTABLE_WALK_TABLE_PRE:
607 		return stage2_map_walk_table_pre(addr, end, level, ptep, data);
608 	case KVM_PGTABLE_WALK_LEAF:
609 		return stage2_map_walk_leaf(addr, end, level, ptep, data);
610 	case KVM_PGTABLE_WALK_TABLE_POST:
611 		return stage2_map_walk_table_post(addr, end, level, ptep, data);
612 	}
613 
614 	return -EINVAL;
615 }
616 
kvm_pgtable_stage2_map(struct kvm_pgtable * pgt,u64 addr,u64 size,u64 phys,enum kvm_pgtable_prot prot,struct kvm_mmu_memory_cache * mc)617 int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
618 			   u64 phys, enum kvm_pgtable_prot prot,
619 			   struct kvm_mmu_memory_cache *mc)
620 {
621 	int ret;
622 	struct stage2_map_data map_data = {
623 		.phys		= ALIGN_DOWN(phys, PAGE_SIZE),
624 		.mmu		= pgt->mmu,
625 		.memcache	= mc,
626 	};
627 	struct kvm_pgtable_walker walker = {
628 		.cb		= stage2_map_walker,
629 		.flags		= KVM_PGTABLE_WALK_TABLE_PRE |
630 				  KVM_PGTABLE_WALK_LEAF |
631 				  KVM_PGTABLE_WALK_TABLE_POST,
632 		.arg		= &map_data,
633 	};
634 
635 	ret = stage2_map_set_prot_attr(prot, &map_data);
636 	if (ret)
637 		return ret;
638 
639 	ret = kvm_pgtable_walk(pgt, addr, size, &walker);
640 	dsb(ishst);
641 	return ret;
642 }
643 
stage2_flush_dcache(void * addr,u64 size)644 static void stage2_flush_dcache(void *addr, u64 size)
645 {
646 	if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
647 		return;
648 
649 	__flush_dcache_area(addr, size);
650 }
651 
stage2_pte_cacheable(kvm_pte_t pte)652 static bool stage2_pte_cacheable(kvm_pte_t pte)
653 {
654 	u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
655 	return memattr == PAGE_S2_MEMATTR(NORMAL);
656 }
657 
stage2_unmap_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)658 static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
659 			       enum kvm_pgtable_walk_flags flag,
660 			       void * const arg)
661 {
662 	struct kvm_s2_mmu *mmu = arg;
663 	kvm_pte_t pte = *ptep, *childp = NULL;
664 	bool need_flush = false;
665 
666 	if (!kvm_pte_valid(pte))
667 		return 0;
668 
669 	if (kvm_pte_table(pte, level)) {
670 		childp = kvm_pte_follow(pte);
671 
672 		if (page_count(virt_to_page(childp)) != 1)
673 			return 0;
674 	} else if (stage2_pte_cacheable(pte)) {
675 		need_flush = true;
676 	}
677 
678 	/*
679 	 * This is similar to the map() path in that we unmap the entire
680 	 * block entry and rely on the remaining portions being faulted
681 	 * back lazily.
682 	 */
683 	kvm_set_invalid_pte(ptep);
684 	kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, addr, level);
685 	put_page(virt_to_page(ptep));
686 
687 	if (need_flush) {
688 		stage2_flush_dcache(kvm_pte_follow(pte),
689 				    kvm_granule_size(level));
690 	}
691 
692 	if (childp)
693 		free_page((unsigned long)childp);
694 
695 	return 0;
696 }
697 
kvm_pgtable_stage2_unmap(struct kvm_pgtable * pgt,u64 addr,u64 size)698 int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
699 {
700 	struct kvm_pgtable_walker walker = {
701 		.cb	= stage2_unmap_walker,
702 		.arg	= pgt->mmu,
703 		.flags	= KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
704 	};
705 
706 	return kvm_pgtable_walk(pgt, addr, size, &walker);
707 }
708 
709 struct stage2_attr_data {
710 	kvm_pte_t	attr_set;
711 	kvm_pte_t	attr_clr;
712 	kvm_pte_t	pte;
713 	u32		level;
714 };
715 
stage2_attr_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)716 static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
717 			      enum kvm_pgtable_walk_flags flag,
718 			      void * const arg)
719 {
720 	kvm_pte_t pte = *ptep;
721 	struct stage2_attr_data *data = arg;
722 
723 	if (!kvm_pte_valid(pte))
724 		return 0;
725 
726 	data->level = level;
727 	data->pte = pte;
728 	pte &= ~data->attr_clr;
729 	pte |= data->attr_set;
730 
731 	/*
732 	 * We may race with the CPU trying to set the access flag here,
733 	 * but worst-case the access flag update gets lost and will be
734 	 * set on the next access instead.
735 	 */
736 	if (data->pte != pte)
737 		WRITE_ONCE(*ptep, pte);
738 
739 	return 0;
740 }
741 
stage2_update_leaf_attrs(struct kvm_pgtable * pgt,u64 addr,u64 size,kvm_pte_t attr_set,kvm_pte_t attr_clr,kvm_pte_t * orig_pte,u32 * level)742 static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
743 				    u64 size, kvm_pte_t attr_set,
744 				    kvm_pte_t attr_clr, kvm_pte_t *orig_pte,
745 				    u32 *level)
746 {
747 	int ret;
748 	kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI;
749 	struct stage2_attr_data data = {
750 		.attr_set	= attr_set & attr_mask,
751 		.attr_clr	= attr_clr & attr_mask,
752 	};
753 	struct kvm_pgtable_walker walker = {
754 		.cb		= stage2_attr_walker,
755 		.arg		= &data,
756 		.flags		= KVM_PGTABLE_WALK_LEAF,
757 	};
758 
759 	ret = kvm_pgtable_walk(pgt, addr, size, &walker);
760 	if (ret)
761 		return ret;
762 
763 	if (orig_pte)
764 		*orig_pte = data.pte;
765 
766 	if (level)
767 		*level = data.level;
768 	return 0;
769 }
770 
kvm_pgtable_stage2_wrprotect(struct kvm_pgtable * pgt,u64 addr,u64 size)771 int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
772 {
773 	return stage2_update_leaf_attrs(pgt, addr, size, 0,
774 					KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
775 					NULL, NULL);
776 }
777 
kvm_pgtable_stage2_mkyoung(struct kvm_pgtable * pgt,u64 addr)778 kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr)
779 {
780 	kvm_pte_t pte = 0;
781 	stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0,
782 				 &pte, NULL);
783 	dsb(ishst);
784 	return pte;
785 }
786 
kvm_pgtable_stage2_mkold(struct kvm_pgtable * pgt,u64 addr)787 kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr)
788 {
789 	kvm_pte_t pte = 0;
790 	stage2_update_leaf_attrs(pgt, addr, 1, 0, KVM_PTE_LEAF_ATTR_LO_S2_AF,
791 				 &pte, NULL);
792 	/*
793 	 * "But where's the TLBI?!", you scream.
794 	 * "Over in the core code", I sigh.
795 	 *
796 	 * See the '->clear_flush_young()' callback on the KVM mmu notifier.
797 	 */
798 	return pte;
799 }
800 
kvm_pgtable_stage2_is_young(struct kvm_pgtable * pgt,u64 addr)801 bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr)
802 {
803 	kvm_pte_t pte = 0;
804 	stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL);
805 	return pte & KVM_PTE_LEAF_ATTR_LO_S2_AF;
806 }
807 
kvm_pgtable_stage2_relax_perms(struct kvm_pgtable * pgt,u64 addr,enum kvm_pgtable_prot prot)808 int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
809 				   enum kvm_pgtable_prot prot)
810 {
811 	int ret;
812 	u32 level;
813 	kvm_pte_t set = 0, clr = 0;
814 
815 	if (prot & KVM_PGTABLE_PROT_R)
816 		set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
817 
818 	if (prot & KVM_PGTABLE_PROT_W)
819 		set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
820 
821 	if (prot & KVM_PGTABLE_PROT_X)
822 		clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
823 
824 	ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level);
825 	if (!ret)
826 		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level);
827 	return ret;
828 }
829 
stage2_flush_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)830 static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
831 			       enum kvm_pgtable_walk_flags flag,
832 			       void * const arg)
833 {
834 	kvm_pte_t pte = *ptep;
835 
836 	if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pte))
837 		return 0;
838 
839 	stage2_flush_dcache(kvm_pte_follow(pte), kvm_granule_size(level));
840 	return 0;
841 }
842 
kvm_pgtable_stage2_flush(struct kvm_pgtable * pgt,u64 addr,u64 size)843 int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
844 {
845 	struct kvm_pgtable_walker walker = {
846 		.cb	= stage2_flush_walker,
847 		.flags	= KVM_PGTABLE_WALK_LEAF,
848 	};
849 
850 	if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
851 		return 0;
852 
853 	return kvm_pgtable_walk(pgt, addr, size, &walker);
854 }
855 
kvm_pgtable_stage2_init(struct kvm_pgtable * pgt,struct kvm * kvm)856 int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm)
857 {
858 	size_t pgd_sz;
859 	u64 vtcr = kvm->arch.vtcr;
860 	u32 ia_bits = VTCR_EL2_IPA(vtcr);
861 	u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
862 	u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
863 
864 	pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
865 	pgt->pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
866 	if (!pgt->pgd)
867 		return -ENOMEM;
868 
869 	pgt->ia_bits		= ia_bits;
870 	pgt->start_level	= start_level;
871 	pgt->mmu		= &kvm->arch.mmu;
872 
873 	/* Ensure zeroed PGD pages are visible to the hardware walker */
874 	dsb(ishst);
875 	return 0;
876 }
877 
stage2_free_walker(u64 addr,u64 end,u32 level,kvm_pte_t * ptep,enum kvm_pgtable_walk_flags flag,void * const arg)878 static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
879 			      enum kvm_pgtable_walk_flags flag,
880 			      void * const arg)
881 {
882 	kvm_pte_t pte = *ptep;
883 
884 	if (!kvm_pte_valid(pte))
885 		return 0;
886 
887 	put_page(virt_to_page(ptep));
888 
889 	if (kvm_pte_table(pte, level))
890 		free_page((unsigned long)kvm_pte_follow(pte));
891 
892 	return 0;
893 }
894 
kvm_pgtable_stage2_destroy(struct kvm_pgtable * pgt)895 void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
896 {
897 	size_t pgd_sz;
898 	struct kvm_pgtable_walker walker = {
899 		.cb	= stage2_free_walker,
900 		.flags	= KVM_PGTABLE_WALK_LEAF |
901 			  KVM_PGTABLE_WALK_TABLE_POST,
902 	};
903 
904 	WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
905 	pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE;
906 	free_pages_exact(pgt->pgd, pgd_sz);
907 	pgt->pgd = NULL;
908 }
909