• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/bin/bash
2# shellcheck disable=SC1090
3# shellcheck disable=SC1091
4# shellcheck disable=SC2086 # we want word splitting
5# shellcheck disable=SC2155
6
7# Second-stage init, used to set up devices and our job environment before
8# running tests.
9
10shopt -s extglob
11
12# Make sure to kill itself and all the children process from this script on
13# exiting, since any console output may interfere with LAVA signals handling,
14# which based on the log console.
15cleanup() {
16  if [ "$BACKGROUND_PIDS" = "" ]; then
17    return 0
18  fi
19
20  set +x
21  echo "Killing all child processes"
22  for pid in $BACKGROUND_PIDS
23  do
24    kill "$pid" 2>/dev/null || true
25  done
26
27  # Sleep just a little to give enough time for subprocesses to be gracefully
28  # killed. Then apply a SIGKILL if necessary.
29  sleep 5
30  for pid in $BACKGROUND_PIDS
31  do
32    kill -9 "$pid" 2>/dev/null || true
33  done
34
35  BACKGROUND_PIDS=
36  set -x
37}
38trap cleanup INT TERM EXIT
39
40# Space separated values with the PIDS of the processes started in the
41# background by this script
42BACKGROUND_PIDS=
43
44
45for path in '/dut-env-vars.sh' '/set-job-env-vars.sh' './set-job-env-vars.sh'; do
46    [ -f "$path" ] && source "$path"
47done
48. "$SCRIPTS_DIR"/setup-test-env.sh
49
50# Flush out anything which might be stuck in a serial buffer
51echo
52echo
53echo
54
55section_switch init_stage2 "Pre-testing hardware setup"
56
57set -ex
58
59# Set up any devices required by the jobs
60[ -z "$HWCI_KERNEL_MODULES" ] || {
61    echo -n $HWCI_KERNEL_MODULES | xargs -d, -n1 /usr/sbin/modprobe
62}
63
64# Set up ZRAM
65HWCI_ZRAM_SIZE=2G
66if /sbin/zramctl --find --size $HWCI_ZRAM_SIZE -a zstd; then
67    mkswap /dev/zram0
68    swapon /dev/zram0
69    echo "zram: $HWCI_ZRAM_SIZE activated"
70else
71    echo "zram: skipping, not supported"
72fi
73
74#
75# Load the KVM module specific to the detected CPU virtualization extensions:
76# - vmx for Intel VT
77# - svm for AMD-V
78#
79# Additionally, download the kernel image to boot the VM via HWCI_TEST_SCRIPT.
80#
81if [ "$HWCI_KVM" = "true" ]; then
82    unset KVM_KERNEL_MODULE
83    {
84      grep -qs '\bvmx\b' /proc/cpuinfo && KVM_KERNEL_MODULE=kvm_intel
85    } || {
86      grep -qs '\bsvm\b' /proc/cpuinfo && KVM_KERNEL_MODULE=kvm_amd
87    }
88
89    {
90      [ -z "${KVM_KERNEL_MODULE}" ] && \
91      echo "WARNING: Failed to detect CPU virtualization extensions"
92    } || \
93        modprobe ${KVM_KERNEL_MODULE}
94
95    mkdir -p /lava-files
96    curl -L --retry 4 -f --retry-all-errors --retry-delay 60 \
97	-o "/lava-files/${KERNEL_IMAGE_NAME}" \
98        "${KERNEL_IMAGE_BASE}/amd64/${KERNEL_IMAGE_NAME}"
99fi
100
101# Fix prefix confusion: the build installs to $CI_PROJECT_DIR, but we expect
102# it in /install
103ln -sf $CI_PROJECT_DIR/install /install
104export LD_LIBRARY_PATH=/install/lib
105export LIBGL_DRIVERS_PATH=/install/lib/dri
106
107# https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22495#note_1876691
108# The navi21 boards seem to have trouble with ld.so.cache, so try explicitly
109# telling it to look in /usr/local/lib.
110export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib
111
112# Store Mesa's disk cache under /tmp, rather than sending it out over NFS.
113export XDG_CACHE_HOME=/tmp
114
115# Make sure Python can find all our imports
116export PYTHONPATH=$(python3 -c "import sys;print(\":\".join(sys.path))")
117
118# If we need to specify a driver, it means several drivers could pick up this gpu;
119# ensure that the other driver can't accidentally be used
120if [ -n "$MESA_LOADER_DRIVER_OVERRIDE" ]; then
121  rm /install/lib/dri/!($MESA_LOADER_DRIVER_OVERRIDE)_dri.so
122fi
123ls -1 /install/lib/dri/*_dri.so || true
124
125if [ "$HWCI_FREQ_MAX" = "true" ]; then
126  # Ensure initialization of the DRM device (needed by MSM)
127  head -0 /dev/dri/renderD128
128
129  # Disable GPU frequency scaling
130  DEVFREQ_GOVERNOR=$(find /sys/devices -name governor | grep gpu || true)
131  test -z "$DEVFREQ_GOVERNOR" || echo performance > $DEVFREQ_GOVERNOR || true
132
133  # Disable CPU frequency scaling
134  echo performance | tee -a /sys/devices/system/cpu/cpufreq/policy*/scaling_governor || true
135
136  # Disable GPU runtime power management
137  GPU_AUTOSUSPEND=$(find /sys/devices -name autosuspend_delay_ms | grep gpu | head -1)
138  test -z "$GPU_AUTOSUSPEND" || echo -1 > $GPU_AUTOSUSPEND || true
139  # Lock Intel GPU frequency to 70% of the maximum allowed by hardware
140  # and enable throttling detection & reporting.
141  # Additionally, set the upper limit for CPU scaling frequency to 65% of the
142  # maximum permitted, as an additional measure to mitigate thermal throttling.
143  /install/common/intel-gpu-freq.sh -s 70% --cpu-set-max 65% -g all -d
144fi
145
146# Start a little daemon to capture sysfs records and produce a JSON file
147KDL_PATH=/install/common/kdl.sh
148if [ -x "$KDL_PATH" ]; then
149  echo "launch kdl.sh!"
150  $KDL_PATH &
151  BACKGROUND_PIDS="$! $BACKGROUND_PIDS"
152else
153  echo "kdl.sh not found!"
154fi
155
156# Increase freedreno hangcheck timer because it's right at the edge of the
157# spilling tests timing out (and some traces, too)
158if [ -n "$FREEDRENO_HANGCHECK_MS" ]; then
159    echo $FREEDRENO_HANGCHECK_MS | tee -a /sys/kernel/debug/dri/128/hangcheck_period_ms
160fi
161
162# Start a little daemon to capture the first devcoredump we encounter.  (They
163# expire after 5 minutes, so we poll for them).
164CAPTURE_DEVCOREDUMP=/install/common/capture-devcoredump.sh
165if [ -x "$CAPTURE_DEVCOREDUMP" ]; then
166  $CAPTURE_DEVCOREDUMP &
167  BACKGROUND_PIDS="$! $BACKGROUND_PIDS"
168fi
169
170ARCH=$(uname -m)
171export VK_DRIVER_FILES="/install/share/vulkan/icd.d/${VK_DRIVER}_icd.$ARCH.json"
172
173# If we want Xorg to be running for the test, then we start it up before the
174# HWCI_TEST_SCRIPT because we need to use xinit to start X (otherwise
175# without using -displayfd you can race with Xorg's startup), but xinit will eat
176# your client's return code
177if [ -n "$HWCI_START_XORG" ]; then
178  echo "touch /xorg-started; sleep 100000" > /xorg-script
179  env \
180    xinit /bin/sh /xorg-script -- /usr/bin/Xorg -noreset -s 0 -dpms -logfile "$RESULTS_DIR/Xorg.0.log" &
181  BACKGROUND_PIDS="$! $BACKGROUND_PIDS"
182
183  # Wait for xorg to be ready for connections.
184  for _ in 1 2 3 4 5; do
185    if [ -e /xorg-started ]; then
186      break
187    fi
188    sleep 5
189  done
190  export DISPLAY=:0
191fi
192
193if [ -n "$HWCI_START_WESTON" ]; then
194  WESTON_X11_SOCK="/tmp/.X11-unix/X0"
195  if [ -n "$HWCI_START_XORG" ]; then
196    echo "Please consider dropping HWCI_START_XORG and instead using Weston XWayland for testing."
197    WESTON_X11_SOCK="/tmp/.X11-unix/X1"
198  fi
199  export WAYLAND_DISPLAY=wayland-0
200
201  # Display server is Weston Xwayland when HWCI_START_XORG is not set or Xorg when it's
202  export DISPLAY=:0
203  mkdir -p /tmp/.X11-unix
204
205  env \
206    weston -Bheadless-backend.so --use-gl -Swayland-0 --xwayland --idle-time=0 &
207  BACKGROUND_PIDS="$! $BACKGROUND_PIDS"
208
209  while [ ! -S "$WESTON_X11_SOCK" ]; do sleep 1; done
210fi
211
212set +x
213
214section_end init_stage2
215
216echo "Running ${HWCI_TEST_SCRIPT} ${HWCI_TEST_ARGS} ..."
217
218set +e
219$HWCI_TEST_SCRIPT ${HWCI_TEST_ARGS:-}; EXIT_CODE=$?
220set -e
221
222section_start post_test_cleanup "Cleaning up after testing, uploading results"
223set -x
224
225# Make sure that capture-devcoredump is done before we start trying to tar up
226# artifacts -- if it's writing while tar is reading, tar will throw an error and
227# kill the job.
228cleanup
229
230# upload artifacts
231if [ -n "$S3_RESULTS_UPLOAD" ]; then
232  tar --zstd -cf results.tar.zst results/;
233  ci-fairy s3cp --token-file "${S3_JWT_FILE}" results.tar.zst https://"$S3_RESULTS_UPLOAD"/results.tar.zst;
234fi
235
236# We still need to echo the hwci: mesa message, as some scripts rely on it, such
237# as the python ones inside the bare-metal folder
238[ ${EXIT_CODE} -eq 0 ] && RESULT=pass || RESULT=fail
239
240set +x
241section_end post_test_cleanup
242
243# Print the final result; both bare-metal and LAVA look for this string to get
244# the result of our run, so try really hard to get it out rather than losing
245# the run. The device gets shut down right at this point, and a630 seems to
246# enjoy corrupting the last line of serial output before shutdown.
247for _ in $(seq 0 3); do echo "hwci: mesa: $RESULT, exit_code: $EXIT_CODE"; sleep 1; echo; done
248
249exit $EXIT_CODE
250