• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Test program for Linux poison memory error recovery.
3  * This injects poison into various mapping cases and triggers the poison
4  * handling.  Requires special injection support in the kernel.
5  *
6  * Copyright 2009, 2010 Intel Corporation
7  *
8  * tinjpage is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public
10  * License as published by the Free Software Foundation; version
11  * 2.
12  *
13  * tinjpage is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * You should find a copy of v2 of the GNU General Public License somewhere
19  * on your Linux system; if not, write to the Free Software Foundation,
20  * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21  *
22  * Authors: Andi Kleen, Fengguang Wu
23  */
24 #define _GNU_SOURCE 1
25 #include <stdio.h>
26 #include <signal.h>
27 #include <unistd.h>
28 #include <sys/fcntl.h>
29 #include <sys/wait.h>
30 #include <sys/mman.h>
31 #include <stdlib.h>
32 #include <setjmp.h>
33 #include <errno.h>
34 #include <string.h>
35 #include <time.h>
36 #include <pthread.h>
37 #include <sys/ipc.h>
38 #include <sys/shm.h>
39 #include <sys/sem.h>
40 #include "utils.h"
41 #include "hugepage.h"
42 
43 #define MADV_POISON 100
44 
45 #define TMPDIR "./"
46 #define PATHBUFLEN 100
47 
48 #define Perror(x) failure++, perror(x)
49 #define PAIR(x) x, sizeof(x)-1
50 #define mb() asm volatile("" ::: "memory")
51 #if defined(__i386__) || defined(__x86_64__)
52 #define cpu_relax() asm volatile("rep ; nop" ::: "memory")
53 #else
54 #define cpu_relax() mb()
55 #endif
56 
57 typedef unsigned long long u64;
58 
59 int PS;
60 int failure;
61 int unexpected;
62 int early_kill;
63 int test_hugepage;
64 
checked_mmap(void * start,size_t length,int prot,int flags,int fd,off_t offset)65 void *checked_mmap(void *start, size_t length, int prot, int flags,
66                    int fd, off_t offset)
67 {
68 	void *map = mmap(start, length, prot, flags, fd, offset);
69 	if (map == (void*)-1L)
70 		err("mmap");
71 	return map;
72 }
73 
munmap_reserve(void * page,int size)74 void munmap_reserve(void *page, int size)
75 {
76 	if (munmap(page, size) < 0)
77 		err("munmap");
78 	if (mmap(page, size, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 0, 0) < 0)
79 		err("mmap2");
80 }
81 
xmalloc(size_t s)82 void *xmalloc(size_t s)
83 {
84 	void *p = malloc(s);
85 	if (!p)
86 		exit(ENOMEM);
87 	return p;
88 }
89 
ilog2(int n)90 static int ilog2(int n)
91 {
92 	int r = 0;
93 	n--;
94 	while (n) {
95 		n >>= 1;
96 		r++;
97 	}
98 	return r;
99 }
100 
101 int recovercount;
102 sigjmp_buf recover_ctx;
103 sigjmp_buf early_recover_ctx;
104 void *expected_addr;
105 
106 /* Work around glibc not defining this yet */
107 struct my_siginfo {
108 	int si_signo;
109 	int si_errno;
110 	int si_code;
111 	union {
112 	struct {
113 		void  *_addr; /* faulting insn/memory ref. */
114 #ifdef __ARCH_SI_TRAPNO
115 		int _trapno;	/* TRAP # which caused the signal */
116 #endif
117 		short _addr_lsb; /* LSB of the reported address */
118 	} _sigfault;
119 	} _sifields;
120 };
121 #undef si_addr_lsb
122 #define si_addr_lsb _sifields._sigfault._addr_lsb
123 
sighandler(int sig,siginfo_t * si,void * arg)124 void sighandler(int sig, siginfo_t *si, void *arg)
125 {
126 	if (si->si_addr != expected_addr) {
127 		printf("XXX: Unexpected address in signal %p (expected %p)\n", si->si_addr,
128 			expected_addr);
129 		failure++;
130 	}
131 
132 	int lsb = ((struct my_siginfo *)si)->si_addr_lsb;
133 	if (test_hugepage) {
134 		if (lsb != ilog2(HPS)) {
135 			printf("LATER: Unexpected addr lsb in siginfo %d\n", lsb);
136 		}
137 	} else {
138 		if (lsb != ilog2(sysconf(_SC_PAGE_SIZE))) {
139 			printf("LATER: Unexpected addr lsb in siginfo %d\n", lsb);
140 		}
141 	}
142 
143 	printf("\tsignal %d code %d addr %p\n", sig, si->si_code, si->si_addr);
144 
145 	if (--recovercount == 0) {
146 		write(1, PAIR("I seem to be in a signal loop. bailing out.\n"));
147 		exit(1);
148 	}
149 
150 	if (si->si_code == 4)
151 		siglongjmp(recover_ctx, 1);
152 	else
153 		siglongjmp(early_recover_ctx, 1);
154 }
155 
156 enum rmode {
157 	MREAD = 0,
158 	MWRITE = 1,
159 	MREAD_OK = 2,
160 	MWRITE_OK = 3,
161 	MNOTHING = -1,
162 };
163 
inject_madvise(char * page)164 void inject_madvise(char *page)
165 {
166 	if (madvise(page, PS, MADV_POISON) != 0) {
167 		if (errno == EINVAL) {
168 			printf("Kernel doesn't support poison injection\n");
169 			exit(0);
170 		}
171 		Perror("madvise");
172 	}
173 }
174 
page_to_pfn(char * page)175 u64 page_to_pfn(char *page)
176 {
177 	static int pagemap_fd = -1;
178 	u64 pfn;
179 
180 	if (pagemap_fd < 0)  {
181 		pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
182 		if (pagemap_fd < 0)
183 			err("/proc/self/pagemap not supported");
184 	}
185 
186 	if (pread(pagemap_fd, &pfn, sizeof(u64),
187 		((u64)page / PS)*sizeof(u64)) != sizeof(u64))
188 		err("Cannot read from pagemap");
189 
190 	pfn &= (1ULL<<56)-1;
191 	return pfn;
192 }
193 
194 /*
195  * Inject Action Optional #MC
196  * with mce-inject using the software injector.
197  *
198  * This tests the low level machine check handler too.
199  *
200  * Slightly racy with page migration because we don't mlock the page.
201  */
inject_mce_inject(char * page)202 void inject_mce_inject(char *page)
203 {
204 	u64 pfn = page_to_pfn(page);
205 	FILE *mce_inject;
206 
207 	mce_inject = popen("mce-inject", "w");
208 	if (!mce_inject) {
209 		fprintf(stderr, "Cannot open pipe to mce-inject: %s\n",
210 				strerror(errno));
211 		exit(1);
212 	}
213 
214 	fprintf(mce_inject,
215 		"CPU 0 BANK 3 STATUS UNCORRECTED SRAO 0xc0\n"
216 		"MCGSTATUS RIPV MCIP\n"
217 		"ADDR %#llx\n"
218 		"MISC 0x8c\n"
219 		"RIP 0x73:0x1eadbabe\n", pfn);
220 
221 	if (ferror(mce_inject) || fclose(mce_inject) < 0) {
222 		fprintf(stderr, "mce-inject failed: %s\n", strerror(errno));
223 		exit(1);
224 	}
225 }
226 
227 void (*inject)(char *page) = inject_madvise;
228 
poison(char * msg,char * page,enum rmode mode)229 void poison(char *msg, char *page, enum rmode mode)
230 {
231 	expected_addr = page;
232 	recovercount = 5;
233 
234 	if (sigsetjmp(early_recover_ctx, 1) == 0) {
235 		inject(page);
236 
237 		if (early_kill && (mode == MWRITE || mode == MREAD)) {
238 			printf("XXX: %s: process is not early killed\n", msg);
239 			failure++;
240 		}
241 
242 		return;
243 	}
244 
245 	if (early_kill) {
246 		if (mode == MREAD_OK || mode == MWRITE_OK) {
247 			printf("XXX: %s: killed\n", msg);
248 			failure++;
249 		} else
250 			printf("\trecovered\n");
251 	}
252 }
253 
recover(char * msg,char * page,enum rmode mode)254 void recover(char *msg, char *page, enum rmode mode)
255 {
256 	expected_addr = page;
257 	recovercount = 5;
258 
259 	if (sigsetjmp(recover_ctx, 1) == 0) {
260 		switch (mode) {
261 		case MWRITE:
262 			printf("\twriting 2\n");
263 			*page = 2;
264 			break;
265 		case MWRITE_OK:
266 			printf("\twriting 4\n");
267 			*page = 4;
268 			return;
269 		case MREAD:
270 			printf("\treading %x\n", *(unsigned char *)page);
271 			break;
272 		case MREAD_OK:
273 			printf("\treading %x\n", *(unsigned char *)page);
274 			return;
275 		case MNOTHING:
276 			return;
277 		}
278 		/* signal or kill should have happened */
279 		printf("XXX: %s: page not poisoned after injection\n", msg);
280 		failure++;
281 		return;
282 	}
283 	if (mode == MREAD_OK || mode == MWRITE_OK) {
284 		printf("XXX: %s: killed\n", msg);
285 		failure++;
286 	} else
287 		printf("\trecovered\n");
288 }
289 
testmem(char * msg,char * page,enum rmode mode)290 void testmem(char *msg, char *page, enum rmode mode)
291 {
292 	printf("\t%s poisoning page %p\n", msg, page);
293 	poison(msg, page, mode);
294 	recover(msg, page, mode);
295 }
296 
expecterr(char * msg,int err)297 void expecterr(char *msg, int err)
298 {
299 	if (err) {
300 		printf("\texpected error %d on %s\n", errno, msg);
301 	} else {
302 		failure++;
303 		printf("XXX: unexpected no error on %s\n", msg);
304 	}
305 }
306 
307 /*
308  * Any optional error is really a deficiency in the kernel VFS error reporting
309  * and should be eventually fixed and turned into a expecterr
310  */
optionalerr(char * msg,int err)311 void optionalerr(char *msg, int err)
312 {
313 	if (err) {
314 		printf("\texpected optional error %d on %s\n", errno, msg);
315 	} else {
316 		unexpected++;
317 		printf("LATER: expected likely incorrect no error on %s\n", msg);
318 	}
319 }
320 
321 static int tmpcount;
tempfd(void)322 int tempfd(void)
323 {
324 	int fd;
325 	char buf[PATHBUFLEN];
326 	snprintf(buf, sizeof buf, TMPDIR "~poison%d",tmpcount++);
327 	fd = open(buf, O_CREAT|O_RDWR, 0600);
328 	if (fd >= 0)
329 		unlink(buf);
330 	if (fd < 0)
331 		err("opening temporary file in " TMPDIR);
332 	return fd;
333 }
334 
playfile(char * buf)335 int playfile(char *buf)
336 {
337 	int fd;
338 	if (buf[0] == 0)
339 		snprintf(buf, PATHBUFLEN, TMPDIR "~poison%d", tmpcount++);
340 	fd = open(buf, O_CREAT|O_RDWR|O_TRUNC, 0600);
341 	if (fd < 0)
342 		err("opening temporary file in " TMPDIR);
343 
344 	const int NPAGES = 5;
345 	char *tmp = xmalloc(PS * NPAGES);
346 	int i;
347 	for (i = 0; i < PS*NPAGES; i++)
348 		tmp[i] = i;
349 	write(fd, tmp, PS*NPAGES);
350 
351 	lseek(fd, 0, SEEK_SET);
352 	return fd;
353 }
354 
dirty_anonymous(void)355 static void dirty_anonymous(void)
356 {
357 	char *page;
358 	page = checked_mmap(NULL, PS, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_POPULATE, 0, 0);
359 	testmem("dirty", page, MWRITE);
360 }
361 
dirty_anonymous_unmap(void)362 static void dirty_anonymous_unmap(void)
363 {
364 	char *page;
365 	page = checked_mmap(NULL, PS, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_POPULATE, 0, 0);
366 	testmem("dirty", page, MWRITE);
367 	munmap_reserve(page, PS);
368 }
369 
mlocked_anonymous(void)370 static void mlocked_anonymous(void)
371 {
372 	char *page;
373 	page = checked_mmap(NULL, PS, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_LOCKED, 0, 0);
374 	testmem("mlocked", page, MWRITE);
375 }
376 
do_file_clean(int flags,char * name)377 static void do_file_clean(int flags, char *name)
378 {
379 	char *page;
380 	char fn[30];
381 	snprintf(fn, 30, TMPDIR "~test%d", tmpcount++);
382 	int fd = open(fn, O_RDWR|O_TRUNC|O_CREAT);
383 	if (fd < 0)
384 		err("open temp file");
385 	write(fd, fn, 4);
386 	page = checked_mmap(NULL, PS, PROT_READ|PROT_WRITE, MAP_SHARED|flags,
387 		fd, 0);
388 	fsync(fd);
389 	close(fd);
390 	testmem(name, page, MREAD_OK);
391 	 /* reread page from disk */
392 	printf("\t reading %x\n", *(unsigned char *)page);
393 	testmem(name, page, MWRITE_OK);
394 }
395 
file_clean(void)396 static void file_clean(void)
397 {
398 	do_file_clean(0, "file clean");
399 }
400 
file_clean_mlocked(void)401 static void file_clean_mlocked(void)
402 {
403 	do_file_clean(MAP_LOCKED, "file clean mlocked");
404 }
405 
ndesc(char * buf,char * name,char * add)406 static char *ndesc(char *buf, char *name, char *add)
407 {
408 	snprintf(buf, 100, "%s %s", name, add);
409 	return buf;
410 }
411 
do_file_dirty(int flags,char * name)412 static void do_file_dirty(int flags, char *name)
413 {
414 	char nbuf[100];
415 	char *page;
416 	char fn[PATHBUFLEN];
417 	fn[0] = 0;
418 	int fd = playfile(fn);
419 
420 	page = checked_mmap(NULL, PS, PROT_READ,
421 			MAP_SHARED|MAP_POPULATE|flags, fd, 0);
422 	testmem(ndesc(nbuf, name, "initial"), page, MREAD);
423 	expecterr("msync expect error", msync(page, PS, MS_SYNC) < 0);
424 	close(fd);
425 	munmap_reserve(page, PS);
426 
427 	fd = open(fn, O_RDONLY);
428 	if (fd < 0) err("reopening temp file");
429 	page = checked_mmap(NULL, PS, PROT_READ, MAP_SHARED|MAP_POPULATE|flags,
430 				fd, 0);
431 	recover(ndesc(nbuf, name, "populated"), page, MREAD_OK);
432 	close(fd);
433 	munmap_reserve(page, PS);
434 
435 	fd = open(fn, O_RDONLY);
436 	if (fd < 0) err("reopening temp file");
437 	page = checked_mmap(NULL, PS, PROT_READ, MAP_SHARED|flags, fd, 0);
438 	recover(ndesc(nbuf, name, "fault"), page, MREAD_OK);
439 	close(fd);
440 	munmap_reserve(page, PS);
441 
442 	fd = open(fn, O_RDWR);
443 	char buf[128];
444 	/* the earlier close has eaten the error */
445 	optionalerr("explicit read after poison", read(fd, buf, sizeof buf) < 0);
446 	optionalerr("explicit write after poison", write(fd, "foobar", 6) < 0);
447 	optionalerr("fsync expect error", fsync(fd) < 0);
448 	close(fd);
449 
450 	/* should unlink return an error here? */
451 	if (unlink(fn) < 0)
452 		perror("unlink");
453 }
454 
file_dirty(void)455 static void file_dirty(void)
456 {
457 	do_file_dirty(0, "file dirty");
458 }
459 
file_dirty_mlocked(void)460 static void file_dirty_mlocked(void)
461 {
462 	do_file_dirty(MAP_LOCKED, "file dirty mlocked");
463 }
464 
465 /* TBD */
file_hole(void)466 static void file_hole(void)
467 {
468 	int fd = tempfd();
469 	char *page;
470 
471 	ftruncate(fd, PS);
472 	page = checked_mmap(NULL, PS, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
473 	*page = 1;
474 	testmem("hole file dirty", page, MREAD);
475 	/* hole error reporting doesn't work in kernel currently, so optional */
476 	optionalerr("hole fsync expect error", fsync(fd) < 0);
477 	optionalerr("hole msync expect error", msync(page, PS, MS_SYNC) < 0);
478 	close(fd);
479 }
480 
nonlinear(void)481 static void nonlinear(void)
482 {
483 	int fd;
484 	const int NPAGES = 10;
485 	int i;
486 	char *page;
487 	char *tmp;
488 
489 	fd = tempfd();
490 	tmp = xmalloc(PS);
491 	for (i = 0; i < NPAGES; i++)  {
492 		memset(tmp, i, PS);
493 		write(fd, tmp, PS);
494 	}
495 	free(tmp);
496 	page = checked_mmap(NULL, PS*NPAGES, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
497 	int k = NPAGES - 1;
498 	for (i = 0; i < NPAGES; i++, k--) {
499 		if (remap_file_pages(page + i*PS, PS, 0, k, 0))
500 			perror("remap_file_pages");
501 	}
502 	*page = 1;
503 	testmem("rfp file dirty", page, MREAD);
504 	expecterr("rfp fsync expect error", fsync(fd) < 0);
505 	optionalerr("rfp msync expect error", msync(page, PS, MS_SYNC) < 0);
506 	close(fd);
507 }
508 
509 /*
510  * These tests are currently too racy to be enabled.
511  */
512 
513 /*
514  * This is quite timing dependent. The sniper might hit the page
515  * before it is dirtied. If that happens tweak the delay
516  * (should auto tune)
517  */
518 enum {
519 	DELAY_NS = 30,
520 };
521 
522 volatile enum sstate { START, WAITING, SNIPE } sstate;
523 
waitfor(enum sstate w,enum sstate s)524 void waitfor(enum sstate w, enum sstate s)
525 {
526 	sstate = w;
527 	mb();
528 	while (sstate != s)
529 		cpu_relax();
530 }
531 
532 struct poison_arg {
533 	char *msg;
534 	char *page;
535 	enum rmode mode;
536 };
537 
sniper(void * p)538 void *sniper(void *p)
539 {
540 	struct poison_arg *arg = p;
541 
542 	waitfor(START, WAITING);
543 	nanosleep(&((struct timespec) { .tv_nsec = DELAY_NS }), NULL);
544 	poison(arg->msg, arg->page, arg->mode);
545 	return NULL;
546 }
547 
setup_sniper(struct poison_arg * arg)548 int setup_sniper(struct poison_arg *arg)
549 {
550 	if (sysconf(_SC_NPROCESSORS_ONLN) < 2)  {
551 		printf("%s: Need at least two CPUs. Not tested\n", arg->msg);
552 		return -1;
553 	}
554 	sstate = START;
555 	mb();
556 	pthread_t thr;
557 	if (pthread_create(&thr, NULL, sniper, arg) < 0)
558 		err("pthread_create");
559 	pthread_detach(thr);
560 	return 0;
561 }
562 
under_io_dirty(void)563 static void under_io_dirty(void)
564 {
565 	struct poison_arg arg;
566 	int fd = tempfd();
567 	char *page;
568 
569 	page = checked_mmap(NULL, PS, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_POPULATE, fd, 0);
570 
571 	arg.page = page;
572 	arg.msg  = "under io dirty";
573 	arg.mode = MWRITE;
574 	if (setup_sniper(&arg) < 0)
575 		return;
576 
577 	write(fd, "xyz", 3);
578 	waitfor(WAITING, WAITING);
579 	expecterr("write under io", fsync(fd) < 0);
580 	close(fd);
581 }
582 
under_io_clean(void)583 static void under_io_clean(void)
584 {
585 	struct poison_arg arg;
586 	char fn[PATHBUFLEN];
587 	int fd;
588 	char *page;
589 	char buf[10];
590 
591  	fd = playfile(fn);
592 	page = checked_mmap(NULL, PS, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_POPULATE, fd, 0);
593 	madvise(page, PS, MADV_DONTNEED);
594 
595 	arg.page = page;
596 	arg.msg  = "under io clean";
597 	arg.mode = MREAD_OK;
598 	if (setup_sniper(&arg) < 0)
599 		return;
600 
601 	waitfor(WAITING, WAITING);
602 	// what is correct here?
603 	if (pread(fd, buf, 10, 0) != 0)
604 		perror("pread under io clean");
605 	close(fd);
606 }
607 
608 /*
609  * semaphore get/put wrapper
610  */
get_semaphore(int sem_id,struct sembuf * sembuffer)611 int get_semaphore(int sem_id, struct sembuf *sembuffer)
612 {
613 	sembuffer->sem_num = 0;
614 	sembuffer->sem_op  = -1;
615 	sembuffer->sem_flg = SEM_UNDO;
616 	return semop(sem_id, sembuffer, 1);
617 }
618 
put_semaphore(int sem_id,struct sembuf * sembuffer)619 int put_semaphore(int sem_id, struct sembuf *sembuffer)
620 {
621 	sembuffer->sem_num = 0;
622 	sembuffer->sem_op  = 1;
623 	sembuffer->sem_flg = SEM_UNDO;
624 	return semop(sem_id, sembuffer, 1);
625 }
626 
627 /* memory sharing mode */
628 enum shared_mode {
629 	MMAP_SHARED = 0,
630 	IPV_SHARED  = 1,
631 };
632 
633 /*
634  * testcase for shared pages, where
635  *  if early_kill == 0, parent access the shared page hwpoisoned by child, and
636  *  if early_kill == 1, parent will be killed by SIGBUS from child.
637  * This testcase checks whether if a shared page is hwpoisoned by one process,
638  * another process sharing the page will be killed expectedly.
639  */
do_shared(int shared_mode)640 static void do_shared(int shared_mode)
641 {
642 	int shm_id = -1, sem_id = -1, semaphore;
643 	pid_t pid;
644 	char *shared_page = NULL;
645 	struct sembuf sembuffer;
646 
647 	if (shared_mode == MMAP_SHARED) {
648 		shared_page = checked_mmap(NULL, PS, PROT_READ|PROT_WRITE,
649 				MAP_SHARED|MAP_ANONYMOUS|MAP_POPULATE, 0, 0);
650 	} else if (shared_mode == IPV_SHARED) {
651 		shm_id = shmget(IPC_PRIVATE, PS, 0666|IPC_CREAT);
652 		if (shm_id == -1)
653 			err("shmget");
654 	} else {
655 		printf("XXX: invalid shared_mode\n");
656 		return;
657 	}
658 
659 	if (early_kill) {
660 		sem_id = semget(IPC_PRIVATE, 1, 0666|IPC_CREAT);
661 		if (sem_id == -1) {
662 			perror("semget");
663 			goto cleanup;
664 		}
665 		semaphore = semctl(sem_id, 0, SETVAL, 1);
666 		if (semaphore == -1) {
667 			perror("semctl");
668 			goto cleanup;
669 		}
670 		if (get_semaphore(sem_id, &sembuffer)) {
671 			perror("get_semaphore");
672 			goto cleanup;
673 		}
674 	}
675 
676 	pid = fork();
677 	if (pid < 0) {
678 		perror("fork");
679 		goto cleanup;
680 	}
681 
682 	if (shared_mode == IPV_SHARED) {
683 		shared_page = shmat(shm_id, NULL, 0);
684 		if (shared_page == (char *)-1) {
685 			perror("shmat");
686 			goto cleanup;
687 		}
688 	}
689 
690 	memset(shared_page, 'a', 3);
691 
692 	if (early_kill) {
693 		struct sigaction sa = {
694 			.sa_sigaction = sighandler,
695 			.sa_flags = SA_SIGINFO
696 		};
697 		sigaction(SIGBUS, &sa, NULL);
698 		expected_addr = shared_page;
699 	}
700 
701 	if (pid) {
702 		siginfo_t sig;
703 
704 		if (early_kill && sigsetjmp(early_recover_ctx, 1) == 0) {
705 			if (put_semaphore(sem_id, &sembuffer))
706 				err("get_semaphore");
707 			/* waiting for SIGBUS from child */
708 			sleep(10);
709 			printf("XXX timeout: child process does not send signal\n");
710 			failure++;
711 			goto cleanup;
712 		}
713 		waitid(P_PID, pid, &sig, WEXITED);
714 
715 		/*
716 		 * check child termination status
717 		 * late kill       : child should exit
718 		 * suicide version : child should be killed by signal
719 		 * early kill      : child should be killed by signal
720 		 */
721 		if (!early_kill) {
722 			struct sigaction sigact;
723 			sigaction(SIGBUS, NULL, &sigact);
724 
725 			if (sigact.sa_handler == SIG_DFL) {/* suicide version */
726 				if (sig.si_code != CLD_KILLED)
727 					goto child_error;
728 			} else { /* late kill */
729 				if (sig.si_code != CLD_EXITED)
730 					goto child_error;
731 			}
732 		} else { /* early kill */
733 			if (sig.si_code != CLD_EXITED)
734 				goto child_error;
735 		}
736 
737 		if (!early_kill)
738 			recover("ipv shared page (parent)",
739 				shared_page, MWRITE);
740 
741 		if (shared_mode == IPV_SHARED && shmdt(shared_page) == -1) {
742 			perror("shmdt");
743 			goto cleanup;
744 		}
745 	}
746 
747 	if (!pid) {
748 		failure = 0;
749 
750 		if (early_kill)
751 			if (get_semaphore(sem_id, &sembuffer))
752 				err("get_semaphore");
753 		testmem("ipv shared page", shared_page, MWRITE);
754 
755 		if (shared_mode == IPV_SHARED && shmdt(shared_page) == -1)
756 			err("shmdt");
757 
758 		fflush(stdout);
759 		_exit(failure);
760 	}
761 
762 cleanup:
763 	if (shared_page) {
764 		if (shared_mode == IPV_SHARED)
765 			shmdt(shared_page);
766 		else
767 			munmap_reserve(shared_page, PS);
768 	}
769 	if (shm_id >= 0 && shmctl(shm_id, IPC_RMID, NULL) < 0)
770 		err("shmctl IPC_RMID");
771 	if (sem_id >= 0 && semctl(sem_id, 0, IPC_RMID) < 0)
772 		err("semctl IPC_RMID");
773 	return;
774 
775 child_error:
776 	printf("XXX child process was terminated unexpectedly\n");
777 	failure++;
778 	goto cleanup;
779 }
780 
mmap_shared(void)781 static void mmap_shared(void)
782 {
783 	do_shared(MMAP_SHARED);
784 }
785 
ipv_shared(void)786 static void ipv_shared(void)
787 {
788 	do_shared(IPV_SHARED);
789 }
790 
anonymous_hugepage(void)791 static void anonymous_hugepage(void)
792 {
793 	char *page;
794 	/* Hugepage isn't supported. */
795 	if (!HPS)
796 		return;
797 	test_hugepage = 1;
798 	page = alloc_anonymous_hugepage(HPS, 1);
799 	/* prefault */
800 	page[0] = 'a';
801 	testmem("anonymous hugepage", page, MWRITE);
802 	free_anonymous_hugepage(page, HPS);
803 	test_hugepage = 0;
804 }
805 
file_backed_hugepage(void)806 static void file_backed_hugepage(void)
807 {
808 	char *page;
809 	char buf[PATHBUFLEN];
810 	int fd;
811 	/* Hugepage isn't supported. */
812 	if (!HPS)
813 		return;
814 	test_hugepage = 1;
815 	snprintf(buf, PATHBUFLEN, "%s/test%d", hugetlbfsdir, tmpcount++);
816 	page = alloc_filebacked_hugepage(buf, HPS, 0, &fd);
817 	/* prefault */
818 	page[0] = 'a';
819 	testmem("file backed hugepage", page, MWRITE);
820 	free_filebacked_hugepage(page, HPS, fd, buf);
821 	test_hugepage = 0;
822 }
823 
shm_hugepage(void)824 static void shm_hugepage(void)
825 {
826 	char *page;
827 	/* Hugepage isn't supported. */
828 	if (!HPS)
829 		return;
830 	test_hugepage = 1;
831 	page = alloc_shm_hugepage(&tmpcount, HPS);
832 	/* prefault */
833 	page[0] = 'a';
834 	testmem("shared memory hugepage", page, MWRITE);
835 	free_shm_hugepage(tmpcount, page);
836 	tmpcount++;
837 	test_hugepage = 0;
838 }
839 
840 struct testcase {
841 	void (*f)(void);
842 	char *name;
843 	int survivable;
844 } cases[] = {
845 	{ dirty_anonymous, "dirty anonymous" },
846 	{ dirty_anonymous_unmap, "dirty anonymous unmap" },
847 	{ mlocked_anonymous, "mlocked anonymous" },
848 	{ file_clean, "file clean", 1 },
849 	{ file_dirty, "file dirty" },
850 	{ file_hole, "file hole" },
851 	{ file_clean_mlocked, "file clean mlocked", 1 },
852 	{ file_dirty_mlocked, "file dirty mlocked"},
853 	{ nonlinear, "nonlinear" },
854 	{ mmap_shared, "mmap shared" },
855 	{ ipv_shared, "ipv shared" },
856 	{ anonymous_hugepage, "anonymous hugepage" },
857 	{ file_backed_hugepage, "file backed hugepage" },
858 	{ shm_hugepage, "shared memory hugepage" },
859 	{},	/* dummy 1 for sniper */
860 	{},	/* dummy 2 for sniper */
861 	{}
862 };
863 
864 struct testcase snipercases[] = {
865 	{ under_io_dirty, "under io dirty" },
866 	{ under_io_clean, "under io clean" },
867 };
868 
usage(void)869 void usage(void)
870 {
871 	fprintf(stderr, "Usage: tinjpage [--sniper]\n"
872 			"Test hwpoison injection on pages in various states\n"
873 			"--mce-inject    Use mce-inject for injection\n"
874 			"--sniper  Enable racy sniper tests (likely broken)\n");
875 	exit(1);
876 }
877 
handle_opts(char ** av)878 void handle_opts(char **av)
879 {
880 	while (*++av) {
881 		if (!strcmp(*av, "--sniper")) {
882 			struct testcase *t;
883 			for (t = cases; t->f; t++)
884 				;
885 			*t++ = snipercases[0];
886 			*t++ = snipercases[1];
887 		}
888 		else if (!strcmp(*av, "--mce-inject")) {
889 			inject = inject_mce_inject;
890 		} else
891 			usage();
892 	}
893 }
894 
main(int ac,char ** av)895 int main(int ac, char **av)
896 {
897 	if (av[1])
898 		handle_opts(av);
899 
900 	PS = getpagesize();
901 	if (hugetlbfs_root(hugetlbfsdir))
902 		HPS = gethugepagesize();
903 
904 	/* don't kill me at poison time, but possibly at page fault time */
905 	early_kill = 0;
906 	system("sysctl -w vm.memory_failure_early_kill=0");
907 
908 	struct sigaction sa = {
909 		.sa_sigaction = sighandler,
910 		.sa_flags = SA_SIGINFO
911 	};
912 
913 	struct testcase *t;
914 	/* catch signals */
915 	sigaction(SIGBUS, &sa, NULL);
916 	for (t = cases; t->f; t++) {
917 		printf("---- testing %s\n", t->name);
918 		t->f();
919 	}
920 
921 	/* suicide version */
922 	for (t = cases; t->f; t++) {
923 		printf("---- testing %s in child\n", t->name);
924 		pid_t child = fork();
925 		if (child == 0) {
926 			signal(SIGBUS, SIG_DFL);
927 			t->f();
928 			if (t->survivable)
929 				_exit(2);
930 			write(1, t->name, strlen(t->name));
931 			write(1, PAIR(" didn't kill itself?\n"));
932 			_exit(1);
933 		} else {
934 			siginfo_t sig;
935 			if (waitid(P_PID, child, &sig, WEXITED) < 0)
936 				perror("waitid");
937 			else {
938 				if (t->survivable) {
939 					if (sig.si_code != CLD_EXITED) {
940 						printf("XXX: %s: child not survived\n", t->name);
941 						failure++;
942 					}
943 				} else {
944 					if (sig.si_code != CLD_KILLED || sig.si_status != SIGBUS) {
945 						printf("XXX: %s: child not killed by SIGBUS\n", t->name);
946 						failure++;
947 					}
948 				}
949 			}
950 		}
951 	}
952 
953 	/* early kill version */
954 	early_kill = 1;
955 	system("sysctl -w vm.memory_failure_early_kill=1");
956 
957 	sigaction(SIGBUS, &sa, NULL);
958 	for (t = cases; t->f; t++) {
959 		printf("---- testing %s (early kill)\n", t->name);
960 		t->f();
961 	}
962 
963 	if (failure > 0) {
964 		printf("FAILURE -- %d cases broken!\n", failure);
965 		return 1;
966 	}
967 	printf("SUCCESS\n");
968 	return 0;
969 }
970