• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/bin/bash
2# randomly soft offline pages
3# random_offline options
4# -t seconds   runtime in seconds (default unlimited)
5# -m max-pages maximum pages to tie up before unpoisoning
6# -s seed      random seed
7# Note: running this for too long may still run out of memory
8# because unpoison cannot completely undo what soft offline
9# does to larger free memory areas (TBD in the kernel)
10# Author: Andi Kleen
11
12# fixme: uses time seed, non reproducible
13
14#mount -t debugfs none /debug
15
16THRESH=1000
17SEED=""
18RUNTIME=""
19DEBUG=/sys/kernel/debug
20
21fail() {
22	echo "ERROR: $@"
23	exit 0
24}
25
26usage() {
27	echo "Usage:"
28	echo "random_offline options"
29	echo -- "-t seconds   runtime in seconds (default unlimited)"
30	echo -- "-m max-pages maximum pages to tie up before unpoisoning"
31	echo -- "-s seed      random seed"
32	fail "Invalid option $1"
33}
34
35while getopts "t:m:s:" option ; do
36	case "$option" in
37	t) RUNTIME=$OPTARG ;;
38	m) THRESH=$OPTARG ;;
39	s) SEED=$OPTARG ;;
40	*) usage $option ;;
41	esac
42done
43
44[ "$(whoami)" != root ] && fail "Not root"
45[ ! -d $DEBUG/hwpoison ] && mount -t debugfs none $DEBUG
46[ ! -d $DEBUG/hwpoison ] && fail "No debugfs"
47[ ! -w /sys/devices/system/memory/soft_offline_page ] && fail "No soft offlining support in kernel"
48[ ! -w $DEBUG/hwpoison/unpoison-pfn ] && fail "no unpoison support in kernel"
49
50end_of_memory() {
51	for i in /sys/firmware/memmap/* ; do
52		case "$(< $i/type)" in
53		"System RAM") ;;
54		*) continue ;;
55		esac
56
57		k=$(< $i/end)
58		k=${k/0x/}
59		k=$(echo $k | tr a-z A-Z)
60
61		echo "ibase=16; $k/1000" | bc
62	done | sort -n | tail -n1
63}
64
65E=$(end_of_memory)
66
67echo "soft offlining pages upto $E"
68
69unpoison() {
70	if [ ! -f offlined ] ; then
71		return
72	fi
73
74	echo unpoisioning
75	while read i ; do
76		#echo -n ,
77		#echo "u $i"
78		(( utotal++ ))
79	 	if ! echo $i | sed 's/000$//' > $DEBUG/hwpoison/unpoison-pfn ; then
80			echo "$i $?" >> unpoison-failed
81			echo "unpoisioning $i failed: $?"
82		else
83			(( usuccess++ ))
84		fi
85	done < offlined
86	echo done
87	echo
88}
89
90trap unpoison 0
91
92if [ "$SEED" = "" ] ; then
93	SEED=$(date +%s)
94fi
95RANDOM=$SEED
96echo "Using random seed $SEED"
97
98start=$(date +%s)
99failed=0
100ufailed=0
101success=0
102usuccess=0
103total=0
104utotal=0
105
106cbefore=$(grep HardwareCorrupted /proc/meminfo)
107
108
109(( k = 0 ))
110rm -f offlined unpoison-failed
111while true ; do
112	T=$(
113	R=$RANDOM
114	X=$(echo "obase=16; ($R%$E)*4096"  | bc)
115	echo 0x$X
116	)
117	#echo "p $T"
118	(( total++ ))
119	if echo 2>/dev/null $T >/sys/devices/system/memory/soft_offline_page ; then
120		echo $T >> offlined
121		(( success++ ))
122	else
123		#echo offlining $T failed $?
124		(( failed++ ))
125		true
126	fi
127	#echo -n .
128
129	(( k++ ))
130	if [ $k -gt $THRESH ] ; then
131		unpoison
132		(( k = 0 ))
133		rm offlined
134	fi
135
136	if [ ! -z "$RUNTIME" ] ; then
137		((DIFF = $(date +%s) - $start))
138		if [ $DIFF -gt "$RUNTIME" ] ; then
139			echo time over
140			trap 0
141			break
142		fi
143	fi
144done
145
146if [ -f unpoison-failed ] ; then
147	ufailed=$(wc -l unpoison-failed | awk ' {print $1}')
148fi
149echo "soft-poison: success $success failed $failed of total $total"
150echo "unpoison-failed: success $usuccess failed $ufailed of total $utotal"
151echo "poisoned before: $cbefore"
152echo -n "poisoned after: "
153grep HardwareCorrupted /proc/meminfo
154
155### xxx automatic success/failure criteria?
156
157