1# 2# MCE library: provide MCE specific functions 3# 4# Copyright (C) 2008, Intel Corp. 5# Author: Huang Ying <ying.huang@intel.com> 6# 7# This file is released under the GPLv2. 8# 9 10extract_mce_from_log() 11{ 12 [ $# -eq 2 ] || die "missing parameter for extract_mce_from_log" 13 local log="$1" 14 local outf="$2" 15 16 sed '1,/HARDWARE ERROR/d' "$log" | \ 17 mcelog --no-dmi --dump-raw-ascii --ascii > "$outf" 18} 19 20mce_reformat() 21{ 22 [ $# -eq 2 ] || die "missing parameter for mce_reformat" 23 local org="$1" 24 local outf="$2" 25 26 mce-inject --dump "$org" > "$outf" 27} 28 29mce_reformat_for_cmp() 30{ 31 local inf="$1" 32 local outf="$2" 33 local removes="$3" 34 35 local tmpf=$WDIR/mce_reformat_for_cmp 36 37 mce-inject --dump "$inf" > $tmpf 38 39 if [ -n "$removes" ]; then 40 for remove in $removes; do 41 sed "/$remove/d" -i $tmpf 42 done 43 fi 44 45 cat $tmpf | tr '\n' '#' | sed '1,$s/##/\n/g' | \ 46 grep -v '#STATUS 0x0#' | \ 47 grep -v '#STATUS 0x800000000000000#' | sort > "$outf" 48} 49 50mce_cmp() 51{ 52 [ $# -eq 3 ] || die "missing parameter for mce_cmp" 53 local m1="$1" 54 local m2="$2" 55 local removes="$3" 56 57 local tmpf1=$WDIR/mce_cmp_1 58 local tmpf2=$WDIR/mce_cmp_2 59 60 mce_reformat_for_cmp "$m1" $tmpf1 "$removes" 61 mce_reformat_for_cmp "$m2" $tmpf2 "$removes" 62 diff $tmpf1 $tmpf2 > /dev/null 63} 64 65get_mcelog_from_dev() 66{ 67 [ $# -eq 1 ] || die "missing parameter for get_mcelog_from_dev" 68 local mcelog_result="$1" 69 if mcelog --dump-raw-ascii > "$mcelog_result"; then 70 true 71 else 72 echo " Failed: can not get mce log from /dev/mcelog" 73 fi 74} 75 76# extract mcelog from kernel log 77get_mcelog_from_klog() 78{ 79 [ $# -eq 2 ] || die "missing parameter for get_mcelog_from_klog" 80 local klog="$1" 81 local mcelog_result="$2" 82 if [ -f "$klog" ] && extract_mce_from_log "$klog" "$mcelog_result"; then 83 true 84 else 85 echo " Failed: Can not extract mcelog from console log" 86 fi 87} 88 89mcelog_filter() 90{ 91 [ $# -eq 2 ] || die "missing parameter for mcelog_filter" 92 local inf="$1" 93 local pat="$2" 94 95 mce-inject --dump "$inf" | tr '\n' '#' | sed '1,$s/##/\n/g' | \ 96 grep -e "$pat" 97} 98 99chk_gcov() 100{ 101 if [ -z "$GCOV" ]; then 102 return 1 103 fi 104 105 if [ -f /sys/kernel/debug/gcov/reset ] && which gcov > /dev/null; then 106 return 0 107 else 108 return 1 109 fi 110} 111 112reset_gcov() 113{ 114 if [ -z "$GCOV" ]; then 115 return 116 fi 117 case $GCOV in 118 copy) 119 echo 1 > /sys/kernel/debug/gcov/reset 120 ;; 121 dump) 122 true; 123 ;; 124 *) 125 echo " Failed: can not reset gcov, invalid GCOV=$GCOV" 126 return 127 ;; 128 esac 129} 130 131get_gcov() 132{ 133 [ $# -eq 1 ] || die "missing parameter for get_gcov" 134 local src_path=$1 135 local src_fn=$(basename $src_path) 136 local src_dir=$(dirname $src_path) 137 if [ -z "$GCOV" ]; then 138 return 139 fi 140 local abs_dir=$(cd -P $KSRC_DIR/$src_dir; pwd) 141 case $GCOV in 142 copy) 143 for f in /sys/kernel/debug/gcov/$abs_dir/*.gc*; do 144 bf=$(basename $f) 145 cat $f > $abs_dir/$bf 146 done 147 ;; 148 dump) 149 true 150 ;; 151 *) 152 echo " Failed: can not get gcov path, invalid GCOV=$GCOV" 153 return 154 ;; 155 esac 156 if ! (cd $KSRC_DIR; gcov -o $src_dir $src_fn &> /dev/null) || \ 157 ! [ -s $KSRC_DIR/$src_fn.gcov ]; then 158 echo " Failed: can not get gcov graph" 159 return 160 fi 161 cp $KSRC_DIR/$src_fn.gcov $RDIR/$this_case 162} 163 164reset_severity_cov() 165{ 166 echo 1 > /sys/kernel/debug/mce/severities-coverage 167} 168 169get_severity_cov() 170{ 171 local sev_cor=/sys/kernel/debug/mce/severities-coverage 172 if [ ! -f $sev_cor ]; then 173 echo " Failed: can not get severities_coverage" 174 return 175 fi 176 cp $sev_cor $RDIR/$this_case 177} 178 179verify_klog() 180{ 181 [ $# -eq 1 ] || die "missing parameter for verify_klog" 182 local klog="$1" 183 if [ -f "$klog" ]; then 184 if check_kern_warning_bug "$klog"; then 185 echo " Failed: kernel warning or bug during MCE" 186 else 187 echo " Passed: No kernel warning or bug" 188 fi 189 else 190 echo " Failed: no kernel log" 191 fi 192} 193 194verify_panic_via_klog() 195{ 196 [ $# -eq 2 ] || die "missing parameter for verify_panic" 197 local klog="$1" 198 local mce_panic="$2" 199 if [ ! -f "$klog" ]; then 200 echo " Failed: no kernel log for checking panic" 201 return -1 202 fi 203 204 if grep "panic" "$klog" | grep "$mce_panic" > /dev/null; then 205 echo " Passed: correct panic" 206 else 207 echo " Failed: uncorrect panic, expected: $mce_panic" 208 fi 209} 210 211verify_timeout_via_klog() 212{ 213 [ $# -eq 1 ] || die "missing parameter for verify_timeout" 214 local klog="$1" 215 if [ ! -f "$klog" ]; then 216 echo " Failed: No kernel log for checking timeout" 217 return -1 218 fi 219 220 if grep "Some CPUs didn't answer in synchronization" "$klog" \ 221 > /dev/null; then 222 echo " Passed: timeout detected" 223 else 224 echo " Failed: no timeout detected" 225 fi 226} 227 228verify_exp_via_klog() 229{ 230 [ $# -ge 2 ] || die "missing parameter for verrify_exp_via_klog" 231 local klog="$1" 232 shift 233 if [ ! -f "$klog" ]; then 234 echo " Failed: No kernel log for checking MCE exp" 235 return -1 236 fi 237 238 for exp in "$@"; do 239 if grep "Machine check: " "$klog" | grep "$exp" > /dev/null; then 240 echo " Passed: correct MCE exp" 241 return 242 fi 243 done 244 echo " Failed: uncorrected MCE exp, expected: $exp" 245} 246 247get_panic_from_mcelog() 248{ 249 [ $# -eq 1 ] || die "missing parameter for get_panic_from_mcelog" 250 local mcelog="$1" 251 local tmpf=$WDIR/get_panic_from_mcelog 252 local addr 253 if mcelog_filter $mcelog "#BANK 219#" | head -1 > $tmpf; then 254 local F="$(sed '1,$s/#/\n/g' $tmpf | awk '/MISC / { print $2 }')" 255 case "$F" in 256 0x1) echo "Fatal machine check" ;; 257 0x2) echo "Machine check from unknown source" ;; 258 0x3) echo "Uncorrected data corruption machine check" ;; 259 0x4) echo "Fatal machine check" ;; 260 *) echo unknown panic $F ;; 261 esac 262 fi 263} 264 265verify_panic_msg() 266{ 267 [ $# -eq 2 ] || die "missing parameter for verify_panic_msg" 268 local panic_msg="$1" 269 local mce_panic="$2" 270 271 if echo ": $panic_msg" | grep -e "$mce_panic" &> /dev/null; then 272 echo " Passed: correct panic" 273 else 274 echo " Failed: uncorrect panic, expected: $mce_panic" 275 fi 276} 277 278verify_timeout_via_mcelog() 279{ 280 [ $# -eq 1 ] || die "missing parameter for verify_timeout" 281 local mcelog="$1" 282 283 if mcelog_filter $mcelog "#BANK 218#" &> /dev/null; then 284 echo " Passed: timeout detected" 285 else 286 echo " Failed: no timeout detected" 287 fi 288} 289 290set_tolerant() 291{ 292 [ $# -eq 1 ] || die "missing parameter for set_tolerant" 293 echo -n $1 > /sys/devices/system/machinecheck/machinecheck0/tolerant 294} 295 296get_tolerant() 297{ 298 cat /sys/devices/system/machinecheck/machinecheck0/tolerant 299} 300 301check_debugfs() 302{ 303 mount|grep /sys/kernel/debug > /dev/null 2>&1 304 [ ! $? -eq 0 ] && mount -t debugfs none /sys/kernel/debug 305 mount|grep /sys/kernel/debug > /dev/null 2>&1 306 [ ! $? -eq 0 ] && die "Kernel without debugfs support ?" 307} 308 309# should be called after check_debugfs 310check_mce() 311{ 312 DEBUGFS=`mount | grep debugfs | cut -d ' ' -f3 | head -1` 313 [ ! -d ${DEBUGFS}/mce ] && die "Kernel without CONFIG_X86_MCE_INJECT ?" 314} 315 316set_fake_panic() 317{ 318 check_debugfs 319 check_mce 320 [ $# -eq 1 ] || die "missing parameter for set_fake_panic" 321 echo -n $1 > /sys/kernel/debug/mce/fake_panic 322} 323 324set_panic_on_oops() 325{ 326 [ $# -eq 1 ] || die "missing parameter for set_panic_on_oops" 327 echo -n $1 > /proc/sys/kernel/panic_on_oops 328} 329