• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2014, The Linux Foundation. All rights reserved.
4  * Debug helper to dump the current kernel pagetables of the system
5  * so that we can see what the various memory ranges are set to.
6  *
7  * Derived from x86 and arm implementation:
8  * (C) Copyright 2008 Intel Corporation
9  *
10  * Author: Arjan van de Ven <arjan@linux.intel.com>
11  */
12 #include <linux/debugfs.h>
13 #include <linux/errno.h>
14 #include <linux/fs.h>
15 #include <linux/io.h>
16 #include <linux/init.h>
17 #include <linux/mm.h>
18 #include <linux/sched.h>
19 #include <linux/seq_file.h>
20 
21 #include <asm/fixmap.h>
22 #include <asm/kasan.h>
23 #include <asm/memory.h>
24 #include <asm/pgtable.h>
25 #include <asm/pgtable-hwdef.h>
26 #include <asm/ptdump.h>
27 
28 
29 enum address_markers_idx {
30 	PAGE_OFFSET_NR = 0,
31 	PAGE_END_NR,
32 #ifdef CONFIG_KASAN
33 	KASAN_START_NR,
34 #endif
35 };
36 
37 static struct addr_marker address_markers[] = {
38 	{ PAGE_OFFSET,			"Linear Mapping start" },
39 	{ 0 /* PAGE_END */,		"Linear Mapping end" },
40 #ifdef CONFIG_KASAN
41 	{ 0 /* KASAN_SHADOW_START */,	"Kasan shadow start" },
42 	{ KASAN_SHADOW_END,		"Kasan shadow end" },
43 #endif
44 	{ MODULES_VADDR,		"Modules start" },
45 	{ MODULES_END,			"Modules end" },
46 	{ VMALLOC_START,		"vmalloc() area" },
47 	{ VMALLOC_END,			"vmalloc() end" },
48 	{ FIXADDR_START,		"Fixmap start" },
49 	{ FIXADDR_TOP,			"Fixmap end" },
50 	{ PCI_IO_START,			"PCI I/O start" },
51 	{ PCI_IO_END,			"PCI I/O end" },
52 #ifdef CONFIG_SPARSEMEM_VMEMMAP
53 	{ VMEMMAP_START,		"vmemmap start" },
54 	{ VMEMMAP_START + VMEMMAP_SIZE,	"vmemmap end" },
55 #endif
56 	{ -1,				NULL },
57 };
58 
59 #define pt_dump_seq_printf(m, fmt, args...)	\
60 ({						\
61 	if (m)					\
62 		seq_printf(m, fmt, ##args);	\
63 })
64 
65 #define pt_dump_seq_puts(m, fmt)	\
66 ({					\
67 	if (m)				\
68 		seq_printf(m, fmt);	\
69 })
70 
71 /*
72  * The page dumper groups page table entries of the same type into a single
73  * description. It uses pg_state to track the range information while
74  * iterating over the pte entries. When the continuity is broken it then
75  * dumps out a description of the range.
76  */
77 struct pg_state {
78 	struct seq_file *seq;
79 	const struct addr_marker *marker;
80 	unsigned long start_address;
81 	unsigned level;
82 	u64 current_prot;
83 	bool check_wx;
84 	unsigned long wx_pages;
85 	unsigned long uxn_pages;
86 };
87 
88 struct prot_bits {
89 	u64		mask;
90 	u64		val;
91 	const char	*set;
92 	const char	*clear;
93 };
94 
95 static const struct prot_bits pte_bits[] = {
96 	{
97 		.mask	= PTE_VALID,
98 		.val	= PTE_VALID,
99 		.set	= " ",
100 		.clear	= "F",
101 	}, {
102 		.mask	= PTE_USER,
103 		.val	= PTE_USER,
104 		.set	= "USR",
105 		.clear	= "   ",
106 	}, {
107 		.mask	= PTE_RDONLY,
108 		.val	= PTE_RDONLY,
109 		.set	= "ro",
110 		.clear	= "RW",
111 	}, {
112 		.mask	= PTE_PXN,
113 		.val	= PTE_PXN,
114 		.set	= "NX",
115 		.clear	= "x ",
116 	}, {
117 		.mask	= PTE_SHARED,
118 		.val	= PTE_SHARED,
119 		.set	= "SHD",
120 		.clear	= "   ",
121 	}, {
122 		.mask	= PTE_AF,
123 		.val	= PTE_AF,
124 		.set	= "AF",
125 		.clear	= "  ",
126 	}, {
127 		.mask	= PTE_NG,
128 		.val	= PTE_NG,
129 		.set	= "NG",
130 		.clear	= "  ",
131 	}, {
132 		.mask	= PTE_CONT,
133 		.val	= PTE_CONT,
134 		.set	= "CON",
135 		.clear	= "   ",
136 	}, {
137 		.mask	= PTE_TABLE_BIT,
138 		.val	= PTE_TABLE_BIT,
139 		.set	= "   ",
140 		.clear	= "BLK",
141 	}, {
142 		.mask	= PTE_UXN,
143 		.val	= PTE_UXN,
144 		.set	= "UXN",
145 	}, {
146 		.mask	= PTE_ATTRINDX_MASK,
147 		.val	= PTE_ATTRINDX(MT_DEVICE_nGnRnE),
148 		.set	= "DEVICE/nGnRnE",
149 	}, {
150 		.mask	= PTE_ATTRINDX_MASK,
151 		.val	= PTE_ATTRINDX(MT_DEVICE_nGnRE),
152 		.set	= "DEVICE/nGnRE",
153 	}, {
154 		.mask	= PTE_ATTRINDX_MASK,
155 		.val	= PTE_ATTRINDX(MT_DEVICE_GRE),
156 		.set	= "DEVICE/GRE",
157 	}, {
158 		.mask	= PTE_ATTRINDX_MASK,
159 		.val	= PTE_ATTRINDX(MT_NORMAL_NC),
160 		.set	= "MEM/NORMAL-NC",
161 	}, {
162 		.mask	= PTE_ATTRINDX_MASK,
163 		.val	= PTE_ATTRINDX(MT_NORMAL),
164 		.set	= "MEM/NORMAL",
165 	}
166 };
167 
168 struct pg_level {
169 	const struct prot_bits *bits;
170 	const char *name;
171 	size_t num;
172 	u64 mask;
173 };
174 
175 static struct pg_level pg_level[] = {
176 	{
177 	}, { /* pgd */
178 		.name	= "PGD",
179 		.bits	= pte_bits,
180 		.num	= ARRAY_SIZE(pte_bits),
181 	}, { /* pud */
182 		.name	= (CONFIG_PGTABLE_LEVELS > 3) ? "PUD" : "PGD",
183 		.bits	= pte_bits,
184 		.num	= ARRAY_SIZE(pte_bits),
185 	}, { /* pmd */
186 		.name	= (CONFIG_PGTABLE_LEVELS > 2) ? "PMD" : "PGD",
187 		.bits	= pte_bits,
188 		.num	= ARRAY_SIZE(pte_bits),
189 	}, { /* pte */
190 		.name	= "PTE",
191 		.bits	= pte_bits,
192 		.num	= ARRAY_SIZE(pte_bits),
193 	},
194 };
195 
dump_prot(struct pg_state * st,const struct prot_bits * bits,size_t num)196 static void dump_prot(struct pg_state *st, const struct prot_bits *bits,
197 			size_t num)
198 {
199 	unsigned i;
200 
201 	for (i = 0; i < num; i++, bits++) {
202 		const char *s;
203 
204 		if ((st->current_prot & bits->mask) == bits->val)
205 			s = bits->set;
206 		else
207 			s = bits->clear;
208 
209 		if (s)
210 			pt_dump_seq_printf(st->seq, " %s", s);
211 	}
212 }
213 
note_prot_uxn(struct pg_state * st,unsigned long addr)214 static void note_prot_uxn(struct pg_state *st, unsigned long addr)
215 {
216 	if (!st->check_wx)
217 		return;
218 
219 	if ((st->current_prot & PTE_UXN) == PTE_UXN)
220 		return;
221 
222 	WARN_ONCE(1, "arm64/mm: Found non-UXN mapping at address %p/%pS\n",
223 		  (void *)st->start_address, (void *)st->start_address);
224 
225 	st->uxn_pages += (addr - st->start_address) / PAGE_SIZE;
226 }
227 
note_prot_wx(struct pg_state * st,unsigned long addr)228 static void note_prot_wx(struct pg_state *st, unsigned long addr)
229 {
230 	if (!st->check_wx)
231 		return;
232 	if ((st->current_prot & PTE_RDONLY) == PTE_RDONLY)
233 		return;
234 	if ((st->current_prot & PTE_PXN) == PTE_PXN)
235 		return;
236 
237 	WARN_ONCE(1, "arm64/mm: Found insecure W+X mapping at address %p/%pS\n",
238 		  (void *)st->start_address, (void *)st->start_address);
239 
240 	st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
241 }
242 
note_page(struct pg_state * st,unsigned long addr,unsigned level,u64 val)243 static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
244 				u64 val)
245 {
246 	static const char units[] = "KMGTPE";
247 	u64 prot = val & pg_level[level].mask;
248 
249 	if (!st->level) {
250 		st->level = level;
251 		st->current_prot = prot;
252 		st->start_address = addr;
253 		pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
254 	} else if (prot != st->current_prot || level != st->level ||
255 		   addr >= st->marker[1].start_address) {
256 		const char *unit = units;
257 		unsigned long delta;
258 
259 		if (st->current_prot) {
260 			note_prot_uxn(st, addr);
261 			note_prot_wx(st, addr);
262 			pt_dump_seq_printf(st->seq, "0x%016lx-0x%016lx   ",
263 				   st->start_address, addr);
264 
265 			delta = (addr - st->start_address) >> 10;
266 			while (!(delta & 1023) && unit[1]) {
267 				delta >>= 10;
268 				unit++;
269 			}
270 			pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit,
271 				   pg_level[st->level].name);
272 			if (pg_level[st->level].bits)
273 				dump_prot(st, pg_level[st->level].bits,
274 					  pg_level[st->level].num);
275 			pt_dump_seq_puts(st->seq, "\n");
276 		}
277 
278 		if (addr >= st->marker[1].start_address) {
279 			st->marker++;
280 			pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
281 		}
282 
283 		st->start_address = addr;
284 		st->current_prot = prot;
285 		st->level = level;
286 	}
287 
288 	if (addr >= st->marker[1].start_address) {
289 		st->marker++;
290 		pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
291 	}
292 
293 }
294 
walk_pte(struct pg_state * st,pmd_t * pmdp,unsigned long start,unsigned long end)295 static void walk_pte(struct pg_state *st, pmd_t *pmdp, unsigned long start,
296 		     unsigned long end)
297 {
298 	unsigned long addr = start;
299 	pte_t *ptep = pte_offset_kernel(pmdp, start);
300 
301 	do {
302 		note_page(st, addr, 4, READ_ONCE(pte_val(*ptep)));
303 	} while (ptep++, addr += PAGE_SIZE, addr != end);
304 }
305 
walk_pmd(struct pg_state * st,pud_t * pudp,unsigned long start,unsigned long end)306 static void walk_pmd(struct pg_state *st, pud_t *pudp, unsigned long start,
307 		     unsigned long end)
308 {
309 	unsigned long next, addr = start;
310 	pmd_t *pmdp = pmd_offset(pudp, start);
311 
312 	do {
313 		pmd_t pmd = READ_ONCE(*pmdp);
314 		next = pmd_addr_end(addr, end);
315 
316 		if (pmd_none(pmd) || pmd_sect(pmd)) {
317 			note_page(st, addr, 3, pmd_val(pmd));
318 		} else {
319 			BUG_ON(pmd_bad(pmd));
320 			walk_pte(st, pmdp, addr, next);
321 		}
322 	} while (pmdp++, addr = next, addr != end);
323 }
324 
walk_pud(struct pg_state * st,pgd_t * pgdp,unsigned long start,unsigned long end)325 static void walk_pud(struct pg_state *st, pgd_t *pgdp, unsigned long start,
326 		     unsigned long end)
327 {
328 	unsigned long next, addr = start;
329 	pud_t *pudp = pud_offset(pgdp, start);
330 
331 	do {
332 		pud_t pud = READ_ONCE(*pudp);
333 		next = pud_addr_end(addr, end);
334 
335 		if (pud_none(pud) || pud_sect(pud)) {
336 			note_page(st, addr, 2, pud_val(pud));
337 		} else {
338 			BUG_ON(pud_bad(pud));
339 			walk_pmd(st, pudp, addr, next);
340 		}
341 	} while (pudp++, addr = next, addr != end);
342 }
343 
walk_pgd(struct pg_state * st,struct mm_struct * mm,unsigned long start)344 static void walk_pgd(struct pg_state *st, struct mm_struct *mm,
345 		     unsigned long start)
346 {
347 	unsigned long end = (start < TASK_SIZE_64) ? TASK_SIZE_64 : 0;
348 	unsigned long next, addr = start;
349 	pgd_t *pgdp = pgd_offset(mm, start);
350 
351 	do {
352 		pgd_t pgd = READ_ONCE(*pgdp);
353 		next = pgd_addr_end(addr, end);
354 
355 		if (pgd_none(pgd)) {
356 			note_page(st, addr, 1, pgd_val(pgd));
357 		} else {
358 			BUG_ON(pgd_bad(pgd));
359 			walk_pud(st, pgdp, addr, next);
360 		}
361 	} while (pgdp++, addr = next, addr != end);
362 }
363 
ptdump_walk_pgd(struct seq_file * m,struct ptdump_info * info)364 void ptdump_walk_pgd(struct seq_file *m, struct ptdump_info *info)
365 {
366 	struct pg_state st = {
367 		.seq = m,
368 		.marker = info->markers,
369 	};
370 
371 	walk_pgd(&st, info->mm, info->base_addr);
372 
373 	note_page(&st, 0, 0, 0);
374 }
375 
ptdump_initialize(void)376 static void ptdump_initialize(void)
377 {
378 	unsigned i, j;
379 
380 	for (i = 0; i < ARRAY_SIZE(pg_level); i++)
381 		if (pg_level[i].bits)
382 			for (j = 0; j < pg_level[i].num; j++)
383 				pg_level[i].mask |= pg_level[i].bits[j].mask;
384 }
385 
386 static struct ptdump_info kernel_ptdump_info = {
387 	.mm		= &init_mm,
388 	.markers	= address_markers,
389 	.base_addr	= PAGE_OFFSET,
390 };
391 
ptdump_check_wx(void)392 void ptdump_check_wx(void)
393 {
394 	struct pg_state st = {
395 		.seq = NULL,
396 		.marker = (struct addr_marker[]) {
397 			{ 0, NULL},
398 			{ -1, NULL},
399 		},
400 		.check_wx = true,
401 	};
402 
403 	walk_pgd(&st, &init_mm, PAGE_OFFSET);
404 	note_page(&st, 0, 0, 0);
405 	if (st.wx_pages || st.uxn_pages)
406 		pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n",
407 			st.wx_pages, st.uxn_pages);
408 	else
409 		pr_info("Checked W+X mappings: passed, no W+X pages found\n");
410 }
411 
ptdump_init(void)412 static int ptdump_init(void)
413 {
414 	address_markers[PAGE_END_NR].start_address = PAGE_END;
415 #ifdef CONFIG_KASAN
416 	address_markers[KASAN_START_NR].start_address = KASAN_SHADOW_START;
417 #endif
418 	ptdump_initialize();
419 	ptdump_debugfs_register(&kernel_ptdump_info, "kernel_page_tables");
420 	return 0;
421 }
422 device_initcall(ptdump_init);
423