• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/bin/sh
2# SPDX-License-Identifier: GPL-2.0-only
3#
4# Copyright 2015, Daniel Axtens, IBM Corporation
5#
6
7
8# do we have ./getscom, ./putscom?
9if [ -x ./getscom ] && [ -x ./putscom ]; then
10	GETSCOM=./getscom
11	PUTSCOM=./putscom
12elif which getscom > /dev/null; then
13	GETSCOM=$(which getscom)
14	PUTSCOM=$(which putscom)
15else
16	cat <<EOF
17Can't find getscom/putscom in . or \$PATH.
18See https://github.com/open-power/skiboot.
19The tool is in external/xscom-utils
20EOF
21	exit 1
22fi
23
24# We will get 8 HMI events per injection
25# todo: deal with things being offline
26expected_hmis=8
27COUNT_HMIS() {
28    dmesg | grep -c 'Harmless Hypervisor Maintenance interrupt'
29}
30
31# massively expand snooze delay, allowing injection on all cores
32ppc64_cpu --smt-snooze-delay=1000000000
33
34# when we exit, restore it
35trap "ppc64_cpu --smt-snooze-delay=100" 0 1
36
37# for each chip+core combination
38# todo - less fragile parsing
39egrep -o 'OCC: Chip [0-9a-f]+ Core [0-9a-f]' < /sys/firmware/opal/msglog |
40while read chipcore; do
41	chip=$(echo "$chipcore"|awk '{print $3}')
42	core=$(echo "$chipcore"|awk '{print $5}')
43	fir="0x1${core}013100"
44
45	# verify that Core FIR is zero as expected
46	if [ "$($GETSCOM -c 0x${chip} $fir)" != 0 ]; then
47		echo "FIR was not zero before injection for chip $chip, core $core. Aborting!"
48		echo "Result of $GETSCOM -c 0x${chip} $fir:"
49		$GETSCOM -c 0x${chip} $fir
50		echo "If you get a -5 error, the core may be in idle state. Try stress-ng."
51		echo "Otherwise, try $PUTSCOM -c 0x${chip} $fir 0"
52		exit 1
53	fi
54
55	# keep track of the number of HMIs handled
56	old_hmis=$(COUNT_HMIS)
57
58	# do injection, adding a marker to dmesg for clarity
59	echo "Injecting HMI on core $core, chip $chip" | tee /dev/kmsg
60	# inject a RegFile recoverable error
61	if ! $PUTSCOM -c 0x${chip} $fir 2000000000000000 > /dev/null; then
62		echo "Error injecting. Aborting!"
63		exit 1
64	fi
65
66	# now we want to wait for all the HMIs to be processed
67	# we expect one per thread on the core
68	i=0;
69	new_hmis=$(COUNT_HMIS)
70	while [ $new_hmis -lt $((old_hmis + expected_hmis)) ] && [ $i -lt 12 ]; do
71	    echo "Seen $((new_hmis - old_hmis)) HMI(s) out of $expected_hmis expected, sleeping"
72	    sleep 5;
73	    i=$((i + 1))
74	    new_hmis=$(COUNT_HMIS)
75	done
76	if [ $i = 12 ]; then
77	    echo "Haven't seen expected $expected_hmis recoveries after 1 min. Aborting."
78	    exit 1
79	fi
80	echo "Processed $expected_hmis events; presumed success. Check dmesg."
81	echo ""
82done
83