1#!/bin/bash 2# shellcheck disable=SC1090 3# shellcheck disable=SC1091 4# shellcheck disable=SC2086 # we want word splitting 5# shellcheck disable=SC2155 6 7# Second-stage init, used to set up devices and our job environment before 8# running tests. 9 10shopt -s extglob 11 12# Make sure to kill itself and all the children process from this script on 13# exiting, since any console output may interfere with LAVA signals handling, 14# which based on the log console. 15cleanup() { 16 if [ "$BACKGROUND_PIDS" = "" ]; then 17 return 0 18 fi 19 20 set +x 21 echo "Killing all child processes" 22 for pid in $BACKGROUND_PIDS 23 do 24 kill "$pid" 2>/dev/null || true 25 done 26 27 # Sleep just a little to give enough time for subprocesses to be gracefully 28 # killed. Then apply a SIGKILL if necessary. 29 sleep 5 30 for pid in $BACKGROUND_PIDS 31 do 32 kill -9 "$pid" 2>/dev/null || true 33 done 34 35 BACKGROUND_PIDS= 36 set -x 37} 38trap cleanup INT TERM EXIT 39 40# Space separated values with the PIDS of the processes started in the 41# background by this script 42BACKGROUND_PIDS= 43 44 45for path in '/dut-env-vars.sh' '/set-job-env-vars.sh' './set-job-env-vars.sh'; do 46 [ -f "$path" ] && source "$path" 47done 48. "$SCRIPTS_DIR"/setup-test-env.sh 49 50# Flush out anything which might be stuck in a serial buffer 51echo 52echo 53echo 54 55section_switch init_stage2 "Pre-testing hardware setup" 56 57set -ex 58 59# Set up any devices required by the jobs 60[ -z "$HWCI_KERNEL_MODULES" ] || { 61 echo -n $HWCI_KERNEL_MODULES | xargs -d, -n1 /usr/sbin/modprobe 62} 63 64# Set up ZRAM 65HWCI_ZRAM_SIZE=2G 66if /sbin/zramctl --find --size $HWCI_ZRAM_SIZE -a zstd; then 67 mkswap /dev/zram0 68 swapon /dev/zram0 69 echo "zram: $HWCI_ZRAM_SIZE activated" 70else 71 echo "zram: skipping, not supported" 72fi 73 74# 75# Load the KVM module specific to the detected CPU virtualization extensions: 76# - vmx for Intel VT 77# - svm for AMD-V 78# 79# Additionally, download the kernel image to boot the VM via HWCI_TEST_SCRIPT. 80# 81if [ "$HWCI_KVM" = "true" ]; then 82 unset KVM_KERNEL_MODULE 83 { 84 grep -qs '\bvmx\b' /proc/cpuinfo && KVM_KERNEL_MODULE=kvm_intel 85 } || { 86 grep -qs '\bsvm\b' /proc/cpuinfo && KVM_KERNEL_MODULE=kvm_amd 87 } 88 89 { 90 [ -z "${KVM_KERNEL_MODULE}" ] && \ 91 echo "WARNING: Failed to detect CPU virtualization extensions" 92 } || \ 93 modprobe ${KVM_KERNEL_MODULE} 94 95 mkdir -p /lava-files 96 curl -L --retry 4 -f --retry-all-errors --retry-delay 60 \ 97 -o "/lava-files/${KERNEL_IMAGE_NAME}" \ 98 "${KERNEL_IMAGE_BASE}/amd64/${KERNEL_IMAGE_NAME}" 99fi 100 101# Fix prefix confusion: the build installs to $CI_PROJECT_DIR, but we expect 102# it in /install 103ln -sf $CI_PROJECT_DIR/install /install 104export LD_LIBRARY_PATH=/install/lib 105export LIBGL_DRIVERS_PATH=/install/lib/dri 106 107# https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22495#note_1876691 108# The navi21 boards seem to have trouble with ld.so.cache, so try explicitly 109# telling it to look in /usr/local/lib. 110export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib 111 112# Store Mesa's disk cache under /tmp, rather than sending it out over NFS. 113export XDG_CACHE_HOME=/tmp 114 115# Make sure Python can find all our imports 116export PYTHONPATH=$(python3 -c "import sys;print(\":\".join(sys.path))") 117 118# If we need to specify a driver, it means several drivers could pick up this gpu; 119# ensure that the other driver can't accidentally be used 120if [ -n "$MESA_LOADER_DRIVER_OVERRIDE" ]; then 121 rm /install/lib/dri/!($MESA_LOADER_DRIVER_OVERRIDE)_dri.so 122fi 123ls -1 /install/lib/dri/*_dri.so || true 124 125if [ "$HWCI_FREQ_MAX" = "true" ]; then 126 # Ensure initialization of the DRM device (needed by MSM) 127 head -0 /dev/dri/renderD128 128 129 # Disable GPU frequency scaling 130 DEVFREQ_GOVERNOR=$(find /sys/devices -name governor | grep gpu || true) 131 test -z "$DEVFREQ_GOVERNOR" || echo performance > $DEVFREQ_GOVERNOR || true 132 133 # Disable CPU frequency scaling 134 echo performance | tee -a /sys/devices/system/cpu/cpufreq/policy*/scaling_governor || true 135 136 # Disable GPU runtime power management 137 GPU_AUTOSUSPEND=$(find /sys/devices -name autosuspend_delay_ms | grep gpu | head -1) 138 test -z "$GPU_AUTOSUSPEND" || echo -1 > $GPU_AUTOSUSPEND || true 139 # Lock Intel GPU frequency to 70% of the maximum allowed by hardware 140 # and enable throttling detection & reporting. 141 # Additionally, set the upper limit for CPU scaling frequency to 65% of the 142 # maximum permitted, as an additional measure to mitigate thermal throttling. 143 /install/common/intel-gpu-freq.sh -s 70% --cpu-set-max 65% -g all -d 144fi 145 146# Start a little daemon to capture sysfs records and produce a JSON file 147KDL_PATH=/install/common/kdl.sh 148if [ -x "$KDL_PATH" ]; then 149 echo "launch kdl.sh!" 150 $KDL_PATH & 151 BACKGROUND_PIDS="$! $BACKGROUND_PIDS" 152else 153 echo "kdl.sh not found!" 154fi 155 156# Increase freedreno hangcheck timer because it's right at the edge of the 157# spilling tests timing out (and some traces, too) 158if [ -n "$FREEDRENO_HANGCHECK_MS" ]; then 159 echo $FREEDRENO_HANGCHECK_MS | tee -a /sys/kernel/debug/dri/128/hangcheck_period_ms 160fi 161 162# Start a little daemon to capture the first devcoredump we encounter. (They 163# expire after 5 minutes, so we poll for them). 164CAPTURE_DEVCOREDUMP=/install/common/capture-devcoredump.sh 165if [ -x "$CAPTURE_DEVCOREDUMP" ]; then 166 $CAPTURE_DEVCOREDUMP & 167 BACKGROUND_PIDS="$! $BACKGROUND_PIDS" 168fi 169 170ARCH=$(uname -m) 171export VK_DRIVER_FILES="/install/share/vulkan/icd.d/${VK_DRIVER}_icd.$ARCH.json" 172 173# If we want Xorg to be running for the test, then we start it up before the 174# HWCI_TEST_SCRIPT because we need to use xinit to start X (otherwise 175# without using -displayfd you can race with Xorg's startup), but xinit will eat 176# your client's return code 177if [ -n "$HWCI_START_XORG" ]; then 178 echo "touch /xorg-started; sleep 100000" > /xorg-script 179 env \ 180 xinit /bin/sh /xorg-script -- /usr/bin/Xorg -noreset -s 0 -dpms -logfile "$RESULTS_DIR/Xorg.0.log" & 181 BACKGROUND_PIDS="$! $BACKGROUND_PIDS" 182 183 # Wait for xorg to be ready for connections. 184 for _ in 1 2 3 4 5; do 185 if [ -e /xorg-started ]; then 186 break 187 fi 188 sleep 5 189 done 190 export DISPLAY=:0 191fi 192 193if [ -n "$HWCI_START_WESTON" ]; then 194 WESTON_X11_SOCK="/tmp/.X11-unix/X0" 195 if [ -n "$HWCI_START_XORG" ]; then 196 echo "Please consider dropping HWCI_START_XORG and instead using Weston XWayland for testing." 197 WESTON_X11_SOCK="/tmp/.X11-unix/X1" 198 fi 199 export WAYLAND_DISPLAY=wayland-0 200 201 # Display server is Weston Xwayland when HWCI_START_XORG is not set or Xorg when it's 202 export DISPLAY=:0 203 mkdir -p /tmp/.X11-unix 204 205 env \ 206 weston -Bheadless-backend.so --use-gl -Swayland-0 --xwayland --idle-time=0 & 207 BACKGROUND_PIDS="$! $BACKGROUND_PIDS" 208 209 while [ ! -S "$WESTON_X11_SOCK" ]; do sleep 1; done 210fi 211 212set +x 213 214section_end init_stage2 215 216echo "Running ${HWCI_TEST_SCRIPT} ${HWCI_TEST_ARGS} ..." 217 218set +e 219$HWCI_TEST_SCRIPT ${HWCI_TEST_ARGS:-}; EXIT_CODE=$? 220set -e 221 222section_start post_test_cleanup "Cleaning up after testing, uploading results" 223set -x 224 225# Make sure that capture-devcoredump is done before we start trying to tar up 226# artifacts -- if it's writing while tar is reading, tar will throw an error and 227# kill the job. 228cleanup 229 230# upload artifacts 231if [ -n "$S3_RESULTS_UPLOAD" ]; then 232 tar --zstd -cf results.tar.zst results/; 233 ci-fairy s3cp --token-file "${S3_JWT_FILE}" results.tar.zst https://"$S3_RESULTS_UPLOAD"/results.tar.zst; 234fi 235 236# We still need to echo the hwci: mesa message, as some scripts rely on it, such 237# as the python ones inside the bare-metal folder 238[ ${EXIT_CODE} -eq 0 ] && RESULT=pass || RESULT=fail 239 240set +x 241section_end post_test_cleanup 242 243# Print the final result; both bare-metal and LAVA look for this string to get 244# the result of our run, so try really hard to get it out rather than losing 245# the run. The device gets shut down right at this point, and a630 seems to 246# enjoy corrupting the last line of serial output before shutdown. 247for _ in $(seq 0 3); do echo "hwci: mesa: $RESULT, exit_code: $EXIT_CODE"; sleep 1; echo; done 248 249exit $EXIT_CODE 250