• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * page-types: Tool for querying page flags
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms of the GNU General Public License as published by the Free
6  * Software Foundation; version 2.
7  *
8  * This program is distributed in the hope that it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should find a copy of v2 of the GNU General Public License somewhere on
14  * your Linux system; if not, write to the Free Software Foundation, Inc., 59
15  * Temple Place, Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2009 Intel corporation
18  *
19  * Authors: Wu Fengguang <fengguang.wu@intel.com>
20  */
21 
22 #define _LARGEFILE64_SOURCE
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <unistd.h>
26 #include <stdint.h>
27 #include <stdarg.h>
28 #include <string.h>
29 #include <getopt.h>
30 #include <limits.h>
31 #include <assert.h>
32 #include <sys/types.h>
33 #include <sys/errno.h>
34 #include <sys/fcntl.h>
35 
36 
37 /*
38  * pagemap kernel ABI bits
39  */
40 
41 #define PM_ENTRY_BYTES      sizeof(uint64_t)
42 #define PM_STATUS_BITS      3
43 #define PM_STATUS_OFFSET    (64 - PM_STATUS_BITS)
44 #define PM_STATUS_MASK      (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
45 #define PM_STATUS(nr)       (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK)
46 #define PM_PSHIFT_BITS      6
47 #define PM_PSHIFT_OFFSET    (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
48 #define PM_PSHIFT_MASK      (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
49 #define PM_PSHIFT(x)        (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
50 #define PM_PFRAME_MASK      ((1LL << PM_PSHIFT_OFFSET) - 1)
51 #define PM_PFRAME(x)        ((x) & PM_PFRAME_MASK)
52 
53 #define PM_PRESENT          PM_STATUS(4LL)
54 #define PM_SWAP             PM_STATUS(2LL)
55 
56 
57 /*
58  * kernel page flags
59  */
60 
61 #define KPF_BYTES		8
62 #define PROC_KPAGEFLAGS		"/proc/kpageflags"
63 
64 /* copied from kpageflags_read() */
65 #define KPF_LOCKED		0
66 #define KPF_ERROR		1
67 #define KPF_REFERENCED		2
68 #define KPF_UPTODATE		3
69 #define KPF_DIRTY		4
70 #define KPF_LRU			5
71 #define KPF_ACTIVE		6
72 #define KPF_SLAB		7
73 #define KPF_WRITEBACK		8
74 #define KPF_RECLAIM		9
75 #define KPF_BUDDY		10
76 
77 /* [11-20] new additions in 2.6.31 */
78 #define KPF_MMAP		11
79 #define KPF_ANON		12
80 #define KPF_SWAPCACHE		13
81 #define KPF_SWAPBACKED		14
82 #define KPF_COMPOUND_HEAD	15
83 #define KPF_COMPOUND_TAIL	16
84 #define KPF_HUGE		17
85 #define KPF_UNEVICTABLE		18
86 #define KPF_HWPOISON		19
87 #define KPF_NOPAGE		20
88 #define KPF_KSM			21
89 
90 /* [32-] kernel hacking assistances */
91 #define KPF_RESERVED		32
92 #define KPF_MLOCKED		33
93 #define KPF_MAPPEDTODISK	34
94 #define KPF_PRIVATE		35
95 #define KPF_PRIVATE_2		36
96 #define KPF_OWNER_PRIVATE	37
97 #define KPF_ARCH		38
98 #define KPF_UNCACHED		39
99 
100 /* [48-] take some arbitrary free slots for expanding overloaded flags
101  * not part of kernel API
102  */
103 #define KPF_READAHEAD		48
104 #define KPF_SLOB_FREE		49
105 #define KPF_SLUB_FROZEN		50
106 #define KPF_SLUB_DEBUG		51
107 
108 #define KPF_ALL_BITS		((uint64_t)~0ULL)
109 #define KPF_HACKERS_BITS	(0xffffULL << 32)
110 #define KPF_OVERLOADED_BITS	(0xffffULL << 48)
111 #define BIT(name)		(1ULL << KPF_##name)
112 #define BITS_COMPOUND		(BIT(COMPOUND_HEAD) | BIT(COMPOUND_TAIL))
113 
114 static const char *page_flag_names[] = {
115 	[KPF_LOCKED]		= "L:locked",
116 	[KPF_ERROR]		= "E:error",
117 	[KPF_REFERENCED]	= "R:referenced",
118 	[KPF_UPTODATE]		= "U:uptodate",
119 	[KPF_DIRTY]		= "D:dirty",
120 	[KPF_LRU]		= "l:lru",
121 	[KPF_ACTIVE]		= "A:active",
122 	[KPF_SLAB]		= "S:slab",
123 	[KPF_WRITEBACK]		= "W:writeback",
124 	[KPF_RECLAIM]		= "I:reclaim",
125 	[KPF_BUDDY]		= "B:buddy",
126 
127 	[KPF_MMAP]		= "M:mmap",
128 	[KPF_ANON]		= "a:anonymous",
129 	[KPF_SWAPCACHE]		= "s:swapcache",
130 	[KPF_SWAPBACKED]	= "b:swapbacked",
131 	[KPF_COMPOUND_HEAD]	= "H:compound_head",
132 	[KPF_COMPOUND_TAIL]	= "T:compound_tail",
133 	[KPF_HUGE]		= "G:huge",
134 	[KPF_UNEVICTABLE]	= "u:unevictable",
135 	[KPF_HWPOISON]		= "X:hwpoison",
136 	[KPF_NOPAGE]		= "n:nopage",
137 	[KPF_KSM]		= "x:ksm",
138 
139 	[KPF_RESERVED]		= "r:reserved",
140 	[KPF_MLOCKED]		= "m:mlocked",
141 	[KPF_MAPPEDTODISK]	= "d:mappedtodisk",
142 	[KPF_PRIVATE]		= "P:private",
143 	[KPF_PRIVATE_2]		= "p:private_2",
144 	[KPF_OWNER_PRIVATE]	= "O:owner_private",
145 	[KPF_ARCH]		= "h:arch",
146 	[KPF_UNCACHED]		= "c:uncached",
147 
148 	[KPF_READAHEAD]		= "I:readahead",
149 	[KPF_SLOB_FREE]		= "P:slob_free",
150 	[KPF_SLUB_FROZEN]	= "A:slub_frozen",
151 	[KPF_SLUB_DEBUG]	= "E:slub_debug",
152 };
153 
154 
155 /*
156  * data structures
157  */
158 
159 static int		opt_raw;	/* for kernel developers */
160 static int		opt_list;	/* list pages (in ranges) */
161 static int		opt_no_summary;	/* don't show summary */
162 static pid_t		opt_pid;	/* process to walk */
163 
164 #define MAX_ADDR_RANGES	1024
165 static int		nr_addr_ranges;
166 static unsigned long	opt_offset[MAX_ADDR_RANGES];
167 static unsigned long	opt_size[MAX_ADDR_RANGES];
168 
169 #define MAX_VMAS	10240
170 static int		nr_vmas;
171 static unsigned long	pg_start[MAX_VMAS];
172 static unsigned long	pg_end[MAX_VMAS];
173 
174 #define MAX_BIT_FILTERS	64
175 static int		nr_bit_filters;
176 static uint64_t		opt_mask[MAX_BIT_FILTERS];
177 static uint64_t		opt_bits[MAX_BIT_FILTERS];
178 
179 static int		page_size;
180 
181 static int		pagemap_fd;
182 static int		kpageflags_fd;
183 
184 static int		opt_hwpoison;
185 static int		opt_unpoison;
186 
187 static const char	hwpoison_debug_fs[] = "/debug/hwpoison";
188 static int		hwpoison_inject_fd;
189 static int		hwpoison_forget_fd;
190 
191 #define HASH_SHIFT	13
192 #define HASH_SIZE	(1 << HASH_SHIFT)
193 #define HASH_MASK	(HASH_SIZE - 1)
194 #define HASH_KEY(flags)	(flags & HASH_MASK)
195 
196 static unsigned long	total_pages;
197 static unsigned long	nr_pages[HASH_SIZE];
198 static uint64_t 	page_flags[HASH_SIZE];
199 
200 
201 /*
202  * helper functions
203  */
204 
205 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
206 
207 #define min_t(type, x, y) ({			\
208 	type __min1 = (x);			\
209 	type __min2 = (y);			\
210 	__min1 < __min2 ? __min1 : __min2; })
211 
212 #define max_t(type, x, y) ({			\
213 	type __max1 = (x);			\
214 	type __max2 = (y);			\
215 	__max1 > __max2 ? __max1 : __max2; })
216 
pages2mb(unsigned long pages)217 static unsigned long pages2mb(unsigned long pages)
218 {
219 	return (pages * page_size) >> 20;
220 }
221 
fatal(const char * x,...)222 static void fatal(const char *x, ...)
223 {
224 	va_list ap;
225 
226 	va_start(ap, x);
227 	vfprintf(stderr, x, ap);
228 	va_end(ap);
229 	exit(EXIT_FAILURE);
230 }
231 
checked_open(const char * pathname,int flags)232 static int checked_open(const char *pathname, int flags)
233 {
234 	int fd = open(pathname, flags);
235 
236 	if (fd < 0) {
237 		perror(pathname);
238 		exit(EXIT_FAILURE);
239 	}
240 
241 	return fd;
242 }
243 
244 /*
245  * pagemap/kpageflags routines
246  */
247 
do_u64_read(int fd,char * name,uint64_t * buf,unsigned long index,unsigned long count)248 static unsigned long do_u64_read(int fd, char *name,
249 				 uint64_t *buf,
250 				 unsigned long index,
251 				 unsigned long count)
252 {
253 	long bytes;
254 
255 	if (index > ULONG_MAX / 8)
256 		fatal("index overflow: %lu\n", index);
257 
258 	if (lseek(fd, index * 8, SEEK_SET) < 0) {
259 		perror(name);
260 		exit(EXIT_FAILURE);
261 	}
262 
263 	bytes = read(fd, buf, count * 8);
264 	if (bytes < 0) {
265 		perror(name);
266 		exit(EXIT_FAILURE);
267 	}
268 	if (bytes % 8)
269 		fatal("partial read: %lu bytes\n", bytes);
270 
271 	return bytes / 8;
272 }
273 
kpageflags_read(uint64_t * buf,unsigned long index,unsigned long pages)274 static unsigned long kpageflags_read(uint64_t *buf,
275 				     unsigned long index,
276 				     unsigned long pages)
277 {
278 	return do_u64_read(kpageflags_fd, PROC_KPAGEFLAGS, buf, index, pages);
279 }
280 
pagemap_read(uint64_t * buf,unsigned long index,unsigned long pages)281 static unsigned long pagemap_read(uint64_t *buf,
282 				  unsigned long index,
283 				  unsigned long pages)
284 {
285 	return do_u64_read(pagemap_fd, "/proc/pid/pagemap", buf, index, pages);
286 }
287 
pagemap_pfn(uint64_t val)288 static unsigned long pagemap_pfn(uint64_t val)
289 {
290 	unsigned long pfn;
291 
292 	if (val & PM_PRESENT)
293 		pfn = PM_PFRAME(val);
294 	else
295 		pfn = 0;
296 
297 	return pfn;
298 }
299 
300 
301 /*
302  * page flag names
303  */
304 
page_flag_name(uint64_t flags)305 static char *page_flag_name(uint64_t flags)
306 {
307 	static char buf[65];
308 	int present;
309 	int i, j;
310 
311 	for (i = 0, j = 0; i < ARRAY_SIZE(page_flag_names); i++) {
312 		present = (flags >> i) & 1;
313 		if (!page_flag_names[i]) {
314 			if (present)
315 				fatal("unknown flag bit %d\n", i);
316 			continue;
317 		}
318 		buf[j++] = present ? page_flag_names[i][0] : '_';
319 	}
320 
321 	return buf;
322 }
323 
page_flag_longname(uint64_t flags)324 static char *page_flag_longname(uint64_t flags)
325 {
326 	static char buf[1024];
327 	int i, n;
328 
329 	for (i = 0, n = 0; i < ARRAY_SIZE(page_flag_names); i++) {
330 		if (!page_flag_names[i])
331 			continue;
332 		if ((flags >> i) & 1)
333 			n += snprintf(buf + n, sizeof(buf) - n, "%s,",
334 					page_flag_names[i] + 2);
335 	}
336 	if (n)
337 		n--;
338 	buf[n] = '\0';
339 
340 	return buf;
341 }
342 
343 
344 /*
345  * page list and summary
346  */
347 
show_page_range(unsigned long voffset,unsigned long offset,uint64_t flags)348 static void show_page_range(unsigned long voffset,
349 			    unsigned long offset, uint64_t flags)
350 {
351 	static uint64_t      flags0;
352 	static unsigned long voff;
353 	static unsigned long index;
354 	static unsigned long count;
355 
356 	if (flags == flags0 && offset == index + count &&
357 	    (!opt_pid || voffset == voff + count)) {
358 		count++;
359 		return;
360 	}
361 
362 	if (count) {
363 		if (opt_pid)
364 			printf("%lx\t", voff);
365 		printf("%lx\t%lx\t%s\n",
366 				index, count, page_flag_name(flags0));
367 	}
368 
369 	flags0 = flags;
370 	index  = offset;
371 	voff   = voffset;
372 	count  = 1;
373 }
374 
show_page(unsigned long voffset,unsigned long offset,uint64_t flags)375 static void show_page(unsigned long voffset,
376 		      unsigned long offset, uint64_t flags)
377 {
378 	if (opt_pid)
379 		printf("%lx\t", voffset);
380 	printf("%lx\t%s\n", offset, page_flag_name(flags));
381 }
382 
show_summary(void)383 static void show_summary(void)
384 {
385 	int i;
386 
387 	printf("             flags\tpage-count       MB"
388 		"  symbolic-flags\t\t\tlong-symbolic-flags\n");
389 
390 	for (i = 0; i < ARRAY_SIZE(nr_pages); i++) {
391 		if (nr_pages[i])
392 			printf("0x%016llx\t%10lu %8lu  %s\t%s\n",
393 				(unsigned long long)page_flags[i],
394 				nr_pages[i],
395 				pages2mb(nr_pages[i]),
396 				page_flag_name(page_flags[i]),
397 				page_flag_longname(page_flags[i]));
398 	}
399 
400 	printf("             total\t%10lu %8lu\n",
401 			total_pages, pages2mb(total_pages));
402 }
403 
404 
405 /*
406  * page flag filters
407  */
408 
bit_mask_ok(uint64_t flags)409 static int bit_mask_ok(uint64_t flags)
410 {
411 	int i;
412 
413 	for (i = 0; i < nr_bit_filters; i++) {
414 		if (opt_bits[i] == KPF_ALL_BITS) {
415 			if ((flags & opt_mask[i]) == 0)
416 				return 0;
417 		} else {
418 			if ((flags & opt_mask[i]) != opt_bits[i])
419 				return 0;
420 		}
421 	}
422 
423 	return 1;
424 }
425 
expand_overloaded_flags(uint64_t flags)426 static uint64_t expand_overloaded_flags(uint64_t flags)
427 {
428 	/* SLOB/SLUB overload several page flags */
429 	if (flags & BIT(SLAB)) {
430 		if (flags & BIT(PRIVATE))
431 			flags ^= BIT(PRIVATE) | BIT(SLOB_FREE);
432 		if (flags & BIT(ACTIVE))
433 			flags ^= BIT(ACTIVE) | BIT(SLUB_FROZEN);
434 		if (flags & BIT(ERROR))
435 			flags ^= BIT(ERROR) | BIT(SLUB_DEBUG);
436 	}
437 
438 	/* PG_reclaim is overloaded as PG_readahead in the read path */
439 	if ((flags & (BIT(RECLAIM) | BIT(WRITEBACK))) == BIT(RECLAIM))
440 		flags ^= BIT(RECLAIM) | BIT(READAHEAD);
441 
442 	return flags;
443 }
444 
well_known_flags(uint64_t flags)445 static uint64_t well_known_flags(uint64_t flags)
446 {
447 	/* hide flags intended only for kernel hacker */
448 	flags &= ~KPF_HACKERS_BITS;
449 
450 	/* hide non-hugeTLB compound pages */
451 	if ((flags & BITS_COMPOUND) && !(flags & BIT(HUGE)))
452 		flags &= ~BITS_COMPOUND;
453 
454 	return flags;
455 }
456 
kpageflags_flags(uint64_t flags)457 static uint64_t kpageflags_flags(uint64_t flags)
458 {
459 	flags = expand_overloaded_flags(flags);
460 
461 	if (!opt_raw)
462 		flags = well_known_flags(flags);
463 
464 	return flags;
465 }
466 
467 /*
468  * page actions
469  */
470 
prepare_hwpoison_fd(void)471 static void prepare_hwpoison_fd(void)
472 {
473 	char buf[100];
474 
475 	if (opt_hwpoison && !hwpoison_inject_fd) {
476 		sprintf(buf, "%s/corrupt-pfn", hwpoison_debug_fs);
477 		hwpoison_inject_fd = checked_open(buf, O_WRONLY);
478 	}
479 
480 	if (opt_unpoison && !hwpoison_forget_fd) {
481 		sprintf(buf, "%s/unpoison-pfn", hwpoison_debug_fs);
482 		hwpoison_forget_fd = checked_open(buf, O_WRONLY);
483 	}
484 }
485 
hwpoison_page(unsigned long offset)486 static int hwpoison_page(unsigned long offset)
487 {
488 	char buf[100];
489 	int len;
490 
491 	len = sprintf(buf, "0x%lx\n", offset);
492 	len = write(hwpoison_inject_fd, buf, len);
493 	if (len < 0) {
494 		perror("hwpoison inject");
495 		return len;
496 	}
497 	return 0;
498 }
499 
unpoison_page(unsigned long offset)500 static int unpoison_page(unsigned long offset)
501 {
502 	char buf[100];
503 	int len;
504 
505 	len = sprintf(buf, "0x%lx\n", offset);
506 	len = write(hwpoison_forget_fd, buf, len);
507 	if (len < 0) {
508 		perror("hwpoison forget");
509 		return len;
510 	}
511 	return 0;
512 }
513 
514 /*
515  * page frame walker
516  */
517 
hash_slot(uint64_t flags)518 static int hash_slot(uint64_t flags)
519 {
520 	int k = HASH_KEY(flags);
521 	int i;
522 
523 	/* Explicitly reserve slot 0 for flags 0: the following logic
524 	 * cannot distinguish an unoccupied slot from slot (flags==0).
525 	 */
526 	if (flags == 0)
527 		return 0;
528 
529 	/* search through the remaining (HASH_SIZE-1) slots */
530 	for (i = 1; i < ARRAY_SIZE(page_flags); i++, k++) {
531 		if (!k || k >= ARRAY_SIZE(page_flags))
532 			k = 1;
533 		if (page_flags[k] == 0) {
534 			page_flags[k] = flags;
535 			return k;
536 		}
537 		if (page_flags[k] == flags)
538 			return k;
539 	}
540 
541 	fatal("hash table full: bump up HASH_SHIFT?\n");
542 	exit(EXIT_FAILURE);
543 }
544 
add_page(unsigned long voffset,unsigned long offset,uint64_t flags)545 static void add_page(unsigned long voffset,
546 		     unsigned long offset, uint64_t flags)
547 {
548 	flags = kpageflags_flags(flags);
549 
550 	if (!bit_mask_ok(flags))
551 		return;
552 
553 	if (opt_hwpoison)
554 		hwpoison_page(offset);
555 	if (opt_unpoison)
556 		unpoison_page(offset);
557 
558 	if (opt_list == 1)
559 		show_page_range(voffset, offset, flags);
560 	else if (opt_list == 2)
561 		show_page(voffset, offset, flags);
562 
563 	nr_pages[hash_slot(flags)]++;
564 	total_pages++;
565 }
566 
567 #define KPAGEFLAGS_BATCH	(64 << 10)	/* 64k pages */
walk_pfn(unsigned long voffset,unsigned long index,unsigned long count)568 static void walk_pfn(unsigned long voffset,
569 		     unsigned long index,
570 		     unsigned long count)
571 {
572 	uint64_t buf[KPAGEFLAGS_BATCH];
573 	unsigned long batch;
574 	long pages;
575 	unsigned long i;
576 
577 	while (count) {
578 		batch = min_t(unsigned long, count, KPAGEFLAGS_BATCH);
579 		pages = kpageflags_read(buf, index, batch);
580 		if (pages == 0)
581 			break;
582 
583 		for (i = 0; i < pages; i++)
584 			add_page(voffset + i, index + i, buf[i]);
585 
586 		index += pages;
587 		count -= pages;
588 	}
589 }
590 
591 #define PAGEMAP_BATCH	(64 << 10)
walk_vma(unsigned long index,unsigned long count)592 static void walk_vma(unsigned long index, unsigned long count)
593 {
594 	uint64_t buf[PAGEMAP_BATCH];
595 	unsigned long batch;
596 	unsigned long pages;
597 	unsigned long pfn;
598 	unsigned long i;
599 
600 	while (count) {
601 		batch = min_t(unsigned long, count, PAGEMAP_BATCH);
602 		pages = pagemap_read(buf, index, batch);
603 		if (pages == 0)
604 			break;
605 
606 		for (i = 0; i < pages; i++) {
607 			pfn = pagemap_pfn(buf[i]);
608 			if (pfn)
609 				walk_pfn(index + i, pfn, 1);
610 		}
611 
612 		index += pages;
613 		count -= pages;
614 	}
615 }
616 
walk_task(unsigned long index,unsigned long count)617 static void walk_task(unsigned long index, unsigned long count)
618 {
619 	const unsigned long end = index + count;
620 	unsigned long start;
621 	int i = 0;
622 
623 	while (index < end) {
624 
625 		while (pg_end[i] <= index)
626 			if (++i >= nr_vmas)
627 				return;
628 		if (pg_start[i] >= end)
629 			return;
630 
631 		start = max_t(unsigned long, pg_start[i], index);
632 		index = min_t(unsigned long, pg_end[i], end);
633 
634 		assert(start < index);
635 		walk_vma(start, index - start);
636 	}
637 }
638 
add_addr_range(unsigned long offset,unsigned long size)639 static void add_addr_range(unsigned long offset, unsigned long size)
640 {
641 	if (nr_addr_ranges >= MAX_ADDR_RANGES)
642 		fatal("too many addr ranges\n");
643 
644 	opt_offset[nr_addr_ranges] = offset;
645 	opt_size[nr_addr_ranges] = min_t(unsigned long, size, ULONG_MAX-offset);
646 	nr_addr_ranges++;
647 }
648 
walk_addr_ranges(void)649 static void walk_addr_ranges(void)
650 {
651 	int i;
652 
653 	kpageflags_fd = checked_open(PROC_KPAGEFLAGS, O_RDONLY);
654 
655 	if (!nr_addr_ranges)
656 		add_addr_range(0, ULONG_MAX);
657 
658 	for (i = 0; i < nr_addr_ranges; i++)
659 		if (!opt_pid)
660 			walk_pfn(0, opt_offset[i], opt_size[i]);
661 		else
662 			walk_task(opt_offset[i], opt_size[i]);
663 
664 	close(kpageflags_fd);
665 }
666 
667 
668 /*
669  * user interface
670  */
671 
page_flag_type(uint64_t flag)672 static const char *page_flag_type(uint64_t flag)
673 {
674 	if (flag & KPF_HACKERS_BITS)
675 		return "(r)";
676 	if (flag & KPF_OVERLOADED_BITS)
677 		return "(o)";
678 	return "   ";
679 }
680 
usage(void)681 static void usage(void)
682 {
683 	int i, j;
684 
685 	printf(
686 "page-types [options]\n"
687 "            -r|--raw                   Raw mode, for kernel developers\n"
688 "            -d|--describe flags        Describe flags\n"
689 "            -a|--addr    addr-spec     Walk a range of pages\n"
690 "            -b|--bits    bits-spec     Walk pages with specified bits\n"
691 "            -p|--pid     pid           Walk process address space\n"
692 #if 0 /* planned features */
693 "            -f|--file    filename      Walk file address space\n"
694 #endif
695 "            -l|--list                  Show page details in ranges\n"
696 "            -L|--list-each             Show page details one by one\n"
697 "            -N|--no-summary            Don't show summary info\n"
698 "            -X|--hwpoison              hwpoison pages\n"
699 "            -x|--unpoison              unpoison pages\n"
700 "            -h|--help                  Show this usage message\n"
701 "flags:\n"
702 "            0x10                       bitfield format, e.g.\n"
703 "            anon                       bit-name, e.g.\n"
704 "            0x10,anon                  comma-separated list, e.g.\n"
705 "addr-spec:\n"
706 "            N                          one page at offset N (unit: pages)\n"
707 "            N+M                        pages range from N to N+M-1\n"
708 "            N,M                        pages range from N to M-1\n"
709 "            N,                         pages range from N to end\n"
710 "            ,M                         pages range from 0 to M-1\n"
711 "bits-spec:\n"
712 "            bit1,bit2                  (flags & (bit1|bit2)) != 0\n"
713 "            bit1,bit2=bit1             (flags & (bit1|bit2)) == bit1\n"
714 "            bit1,~bit2                 (flags & (bit1|bit2)) == bit1\n"
715 "            =bit1,bit2                 flags == (bit1|bit2)\n"
716 "bit-names:\n"
717 	);
718 
719 	for (i = 0, j = 0; i < ARRAY_SIZE(page_flag_names); i++) {
720 		if (!page_flag_names[i])
721 			continue;
722 		printf("%16s%s", page_flag_names[i] + 2,
723 				 page_flag_type(1ULL << i));
724 		if (++j > 3) {
725 			j = 0;
726 			putchar('\n');
727 		}
728 	}
729 	printf("\n                                   "
730 		"(r) raw mode bits  (o) overloaded bits\n");
731 }
732 
parse_number(const char * str)733 static unsigned long long parse_number(const char *str)
734 {
735 	unsigned long long n;
736 
737 	n = strtoll(str, NULL, 0);
738 
739 	if (n == 0 && str[0] != '0')
740 		fatal("invalid name or number: %s\n", str);
741 
742 	return n;
743 }
744 
parse_pid(const char * str)745 static void parse_pid(const char *str)
746 {
747 	FILE *file;
748 	char buf[5000];
749 
750 	opt_pid = parse_number(str);
751 
752 	sprintf(buf, "/proc/%d/pagemap", opt_pid);
753 	pagemap_fd = checked_open(buf, O_RDONLY);
754 
755 	sprintf(buf, "/proc/%d/maps", opt_pid);
756 	file = fopen(buf, "r");
757 	if (!file) {
758 		perror(buf);
759 		exit(EXIT_FAILURE);
760 	}
761 
762 	while (fgets(buf, sizeof(buf), file) != NULL) {
763 		unsigned long vm_start;
764 		unsigned long vm_end;
765 		unsigned long long pgoff;
766 		int major, minor;
767 		char r, w, x, s;
768 		unsigned long ino;
769 		int n;
770 
771 		n = sscanf(buf, "%lx-%lx %c%c%c%c %llx %x:%x %lu",
772 			   &vm_start,
773 			   &vm_end,
774 			   &r, &w, &x, &s,
775 			   &pgoff,
776 			   &major, &minor,
777 			   &ino);
778 		if (n < 10) {
779 			fprintf(stderr, "unexpected line: %s\n", buf);
780 			continue;
781 		}
782 		pg_start[nr_vmas] = vm_start / page_size;
783 		pg_end[nr_vmas] = vm_end / page_size;
784 		if (++nr_vmas >= MAX_VMAS) {
785 			fprintf(stderr, "too many VMAs\n");
786 			break;
787 		}
788 	}
789 	fclose(file);
790 }
791 
parse_file(const char * name)792 static void parse_file(const char *name)
793 {
794 }
795 
parse_addr_range(const char * optarg)796 static void parse_addr_range(const char *optarg)
797 {
798 	unsigned long offset;
799 	unsigned long size;
800 	char *p;
801 
802 	p = strchr(optarg, ',');
803 	if (!p)
804 		p = strchr(optarg, '+');
805 
806 	if (p == optarg) {
807 		offset = 0;
808 		size   = parse_number(p + 1);
809 	} else if (p) {
810 		offset = parse_number(optarg);
811 		if (p[1] == '\0')
812 			size = ULONG_MAX;
813 		else {
814 			size = parse_number(p + 1);
815 			if (*p == ',') {
816 				if (size < offset)
817 					fatal("invalid range: %lu,%lu\n",
818 							offset, size);
819 				size -= offset;
820 			}
821 		}
822 	} else {
823 		offset = parse_number(optarg);
824 		size   = 1;
825 	}
826 
827 	add_addr_range(offset, size);
828 }
829 
add_bits_filter(uint64_t mask,uint64_t bits)830 static void add_bits_filter(uint64_t mask, uint64_t bits)
831 {
832 	if (nr_bit_filters >= MAX_BIT_FILTERS)
833 		fatal("too much bit filters\n");
834 
835 	opt_mask[nr_bit_filters] = mask;
836 	opt_bits[nr_bit_filters] = bits;
837 	nr_bit_filters++;
838 }
839 
parse_flag_name(const char * str,int len)840 static uint64_t parse_flag_name(const char *str, int len)
841 {
842 	int i;
843 
844 	if (!*str || !len)
845 		return 0;
846 
847 	if (len <= 8 && !strncmp(str, "compound", len))
848 		return BITS_COMPOUND;
849 
850 	for (i = 0; i < ARRAY_SIZE(page_flag_names); i++) {
851 		if (!page_flag_names[i])
852 			continue;
853 		if (!strncmp(str, page_flag_names[i] + 2, len))
854 			return 1ULL << i;
855 	}
856 
857 	return parse_number(str);
858 }
859 
parse_flag_names(const char * str,int all)860 static uint64_t parse_flag_names(const char *str, int all)
861 {
862 	const char *p    = str;
863 	uint64_t   flags = 0;
864 
865 	while (1) {
866 		if (*p == ',' || *p == '=' || *p == '\0') {
867 			if ((*str != '~') || (*str == '~' && all && *++str))
868 				flags |= parse_flag_name(str, p - str);
869 			if (*p != ',')
870 				break;
871 			str = p + 1;
872 		}
873 		p++;
874 	}
875 
876 	return flags;
877 }
878 
parse_bits_mask(const char * optarg)879 static void parse_bits_mask(const char *optarg)
880 {
881 	uint64_t mask;
882 	uint64_t bits;
883 	const char *p;
884 
885 	p = strchr(optarg, '=');
886 	if (p == optarg) {
887 		mask = KPF_ALL_BITS;
888 		bits = parse_flag_names(p + 1, 0);
889 	} else if (p) {
890 		mask = parse_flag_names(optarg, 0);
891 		bits = parse_flag_names(p + 1, 0);
892 	} else if (strchr(optarg, '~')) {
893 		mask = parse_flag_names(optarg, 1);
894 		bits = parse_flag_names(optarg, 0);
895 	} else {
896 		mask = parse_flag_names(optarg, 0);
897 		bits = KPF_ALL_BITS;
898 	}
899 
900 	add_bits_filter(mask, bits);
901 }
902 
describe_flags(const char * optarg)903 static void describe_flags(const char *optarg)
904 {
905 	uint64_t flags = parse_flag_names(optarg, 0);
906 
907 	printf("0x%016llx\t%s\t%s\n",
908 		(unsigned long long)flags,
909 		page_flag_name(flags),
910 		page_flag_longname(flags));
911 }
912 
913 static const struct option opts[] = {
914 	{ "raw"       , 0, NULL, 'r' },
915 	{ "pid"       , 1, NULL, 'p' },
916 	{ "file"      , 1, NULL, 'f' },
917 	{ "addr"      , 1, NULL, 'a' },
918 	{ "bits"      , 1, NULL, 'b' },
919 	{ "describe"  , 1, NULL, 'd' },
920 	{ "list"      , 0, NULL, 'l' },
921 	{ "list-each" , 0, NULL, 'L' },
922 	{ "no-summary", 0, NULL, 'N' },
923 	{ "hwpoison"  , 0, NULL, 'X' },
924 	{ "unpoison"  , 0, NULL, 'x' },
925 	{ "help"      , 0, NULL, 'h' },
926 	{ NULL        , 0, NULL, 0 }
927 };
928 
main(int argc,char * argv[])929 int main(int argc, char *argv[])
930 {
931 	int c;
932 
933 	page_size = getpagesize();
934 
935 	while ((c = getopt_long(argc, argv,
936 				"rp:f:a:b:d:lLNXxh", opts, NULL)) != -1) {
937 		switch (c) {
938 		case 'r':
939 			opt_raw = 1;
940 			break;
941 		case 'p':
942 			parse_pid(optarg);
943 			break;
944 		case 'f':
945 			parse_file(optarg);
946 			break;
947 		case 'a':
948 			parse_addr_range(optarg);
949 			break;
950 		case 'b':
951 			parse_bits_mask(optarg);
952 			break;
953 		case 'd':
954 			describe_flags(optarg);
955 			exit(0);
956 		case 'l':
957 			opt_list = 1;
958 			break;
959 		case 'L':
960 			opt_list = 2;
961 			break;
962 		case 'N':
963 			opt_no_summary = 1;
964 			break;
965 		case 'X':
966 			opt_hwpoison = 1;
967 			prepare_hwpoison_fd();
968 			break;
969 		case 'x':
970 			opt_unpoison = 1;
971 			prepare_hwpoison_fd();
972 			break;
973 		case 'h':
974 			usage();
975 			exit(0);
976 		default:
977 			usage();
978 			exit(1);
979 		}
980 	}
981 
982 	if (opt_list && opt_pid)
983 		printf("voffset\t");
984 	if (opt_list == 1)
985 		printf("offset\tlen\tflags\n");
986 	if (opt_list == 2)
987 		printf("offset\tflags\n");
988 
989 	walk_addr_ranges();
990 
991 	if (opt_list == 1)
992 		show_page_range(0, 0, 0);  /* drain the buffer */
993 
994 	if (opt_no_summary)
995 		return 0;
996 
997 	if (opt_list)
998 		printf("\n\n");
999 
1000 	show_summary();
1001 
1002 	return 0;
1003 }
1004