• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/bin/bash
2# shellcheck disable=SC1090
3# shellcheck disable=SC1091
4# shellcheck disable=SC2086 # we want word splitting
5# shellcheck disable=SC2155
6
7# Second-stage init, used to set up devices and our job environment before
8# running tests.
9
10shopt -s extglob
11
12# Make sure to kill itself and all the children process from this script on
13# exiting, since any console output may interfere with LAVA signals handling,
14# which based on the log console.
15cleanup() {
16  if [ "$BACKGROUND_PIDS" = "" ]; then
17    return 0
18  fi
19
20  set +x
21  echo "Killing all child processes"
22  for pid in $BACKGROUND_PIDS
23  do
24    kill "$pid" 2>/dev/null || true
25  done
26
27  # Sleep just a little to give enough time for subprocesses to be gracefully
28  # killed. Then apply a SIGKILL if necessary.
29  sleep 5
30  for pid in $BACKGROUND_PIDS
31  do
32    kill -9 "$pid" 2>/dev/null || true
33  done
34
35  BACKGROUND_PIDS=
36  set -x
37}
38trap cleanup INT TERM EXIT
39
40# Space separated values with the PIDS of the processes started in the
41# background by this script
42BACKGROUND_PIDS=
43
44
45for path in '/dut-env-vars.sh' '/set-job-env-vars.sh' './set-job-env-vars.sh'; do
46    [ -f "$path" ] && source "$path"
47done
48. "$SCRIPTS_DIR"/setup-test-env.sh
49
50set -ex
51
52# Set up any devices required by the jobs
53[ -z "$HWCI_KERNEL_MODULES" ] || {
54    echo -n $HWCI_KERNEL_MODULES | xargs -d, -n1 /usr/sbin/modprobe
55}
56
57# Set up ZRAM
58HWCI_ZRAM_SIZE=2G
59if /sbin/zramctl --find --size $HWCI_ZRAM_SIZE -a zstd; then
60    mkswap /dev/zram0
61    swapon /dev/zram0
62    echo "zram: $HWCI_ZRAM_SIZE activated"
63else
64    echo "zram: skipping, not supported"
65fi
66
67#
68# Load the KVM module specific to the detected CPU virtualization extensions:
69# - vmx for Intel VT
70# - svm for AMD-V
71#
72# Additionally, download the kernel image to boot the VM via HWCI_TEST_SCRIPT.
73#
74if [ "$HWCI_KVM" = "true" ]; then
75    unset KVM_KERNEL_MODULE
76    {
77      grep -qs '\bvmx\b' /proc/cpuinfo && KVM_KERNEL_MODULE=kvm_intel
78    } || {
79      grep -qs '\bsvm\b' /proc/cpuinfo && KVM_KERNEL_MODULE=kvm_amd
80    }
81
82    {
83      [ -z "${KVM_KERNEL_MODULE}" ] && \
84      echo "WARNING: Failed to detect CPU virtualization extensions"
85    } || \
86        modprobe ${KVM_KERNEL_MODULE}
87
88    mkdir -p /lava-files
89    curl -L --retry 4 -f --retry-all-errors --retry-delay 60 \
90	-o "/lava-files/${KERNEL_IMAGE_NAME}" \
91        "${KERNEL_IMAGE_BASE}/amd64/${KERNEL_IMAGE_NAME}"
92fi
93
94# Fix prefix confusion: the build installs to $CI_PROJECT_DIR, but we expect
95# it in /install
96ln -sf $CI_PROJECT_DIR/install /install
97export LD_LIBRARY_PATH=/install/lib
98export LIBGL_DRIVERS_PATH=/install/lib/dri
99
100# https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22495#note_1876691
101# The navi21 boards seem to have trouble with ld.so.cache, so try explicitly
102# telling it to look in /usr/local/lib.
103export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib
104
105# Store Mesa's disk cache under /tmp, rather than sending it out over NFS.
106export XDG_CACHE_HOME=/tmp
107
108# Make sure Python can find all our imports
109export PYTHONPATH=$(python3 -c "import sys;print(\":\".join(sys.path))")
110
111# If we need to specify a driver, it means several drivers could pick up this gpu;
112# ensure that the other driver can't accidentally be used
113if [ -n "$MESA_LOADER_DRIVER_OVERRIDE" ]; then
114  rm /install/lib/dri/!($MESA_LOADER_DRIVER_OVERRIDE)_dri.so
115fi
116ls -l /install/lib/dri/*_dri.so
117
118if [ "$HWCI_FREQ_MAX" = "true" ]; then
119  # Ensure initialization of the DRM device (needed by MSM)
120  head -0 /dev/dri/renderD128
121
122  # Disable GPU frequency scaling
123  DEVFREQ_GOVERNOR=$(find /sys/devices -name governor | grep gpu || true)
124  test -z "$DEVFREQ_GOVERNOR" || echo performance > $DEVFREQ_GOVERNOR || true
125
126  # Disable CPU frequency scaling
127  echo performance | tee -a /sys/devices/system/cpu/cpufreq/policy*/scaling_governor || true
128
129  # Disable GPU runtime power management
130  GPU_AUTOSUSPEND=$(find /sys/devices -name autosuspend_delay_ms | grep gpu | head -1)
131  test -z "$GPU_AUTOSUSPEND" || echo -1 > $GPU_AUTOSUSPEND || true
132  # Lock Intel GPU frequency to 70% of the maximum allowed by hardware
133  # and enable throttling detection & reporting.
134  # Additionally, set the upper limit for CPU scaling frequency to 65% of the
135  # maximum permitted, as an additional measure to mitigate thermal throttling.
136  /intel-gpu-freq.sh -s 70% --cpu-set-max 65% -g all -d
137fi
138
139# Start a little daemon to capture sysfs records and produce a JSON file
140if [ -x /kdl.sh ]; then
141  echo "launch kdl.sh!"
142  /kdl.sh &
143  BACKGROUND_PIDS="$! $BACKGROUND_PIDS"
144else
145  echo "kdl.sh not found!"
146fi
147
148# Increase freedreno hangcheck timer because it's right at the edge of the
149# spilling tests timing out (and some traces, too)
150if [ -n "$FREEDRENO_HANGCHECK_MS" ]; then
151    echo $FREEDRENO_HANGCHECK_MS | tee -a /sys/kernel/debug/dri/128/hangcheck_period_ms
152fi
153
154# Start a little daemon to capture the first devcoredump we encounter.  (They
155# expire after 5 minutes, so we poll for them).
156if [ -x /capture-devcoredump.sh ]; then
157  /capture-devcoredump.sh &
158  BACKGROUND_PIDS="$! $BACKGROUND_PIDS"
159fi
160
161# If we want Xorg to be running for the test, then we start it up before the
162# HWCI_TEST_SCRIPT because we need to use xinit to start X (otherwise
163# without using -displayfd you can race with Xorg's startup), but xinit will eat
164# your client's return code
165if [ -n "$HWCI_START_XORG" ]; then
166  echo "touch /xorg-started; sleep 100000" > /xorg-script
167  env \
168    VK_ICD_FILENAMES="/install/share/vulkan/icd.d/${VK_DRIVER}_icd.$(uname -m).json" \
169    xinit /bin/sh /xorg-script -- /usr/bin/Xorg -noreset -s 0 -dpms -logfile /Xorg.0.log &
170  BACKGROUND_PIDS="$! $BACKGROUND_PIDS"
171
172  # Wait for xorg to be ready for connections.
173  for _ in 1 2 3 4 5; do
174    if [ -e /xorg-started ]; then
175      break
176    fi
177    sleep 5
178  done
179  export DISPLAY=:0
180fi
181
182if [ -n "$HWCI_START_WESTON" ]; then
183  WESTON_X11_SOCK="/tmp/.X11-unix/X0"
184  if [ -n "$HWCI_START_XORG" ]; then
185    echo "Please consider dropping HWCI_START_XORG and instead using Weston XWayland for testing."
186    WESTON_X11_SOCK="/tmp/.X11-unix/X1"
187  fi
188  export WAYLAND_DISPLAY=wayland-0
189
190  # Display server is Weston Xwayland when HWCI_START_XORG is not set or Xorg when it's
191  export DISPLAY=:0
192  mkdir -p /tmp/.X11-unix
193
194  env \
195    VK_ICD_FILENAMES="/install/share/vulkan/icd.d/${VK_DRIVER}_icd.$(uname -m).json" \
196    weston -Bheadless-backend.so --use-gl -Swayland-0 --xwayland --idle-time=0 &
197  BACKGROUND_PIDS="$! $BACKGROUND_PIDS"
198
199  while [ ! -S "$WESTON_X11_SOCK" ]; do sleep 1; done
200fi
201
202set +e
203bash -c ". $SCRIPTS_DIR/setup-test-env.sh && $HWCI_TEST_SCRIPT"
204EXIT_CODE=$?
205set -e
206
207# Let's make sure the results are always stored in current working directory
208mv -f ${CI_PROJECT_DIR}/results ./ 2>/dev/null || true
209
210[ ${EXIT_CODE} -ne 0 ] || rm -rf results/trace/"$PIGLIT_REPLAY_DEVICE_NAME"
211
212# Make sure that capture-devcoredump is done before we start trying to tar up
213# artifacts -- if it's writing while tar is reading, tar will throw an error and
214# kill the job.
215cleanup
216
217# upload artifacts
218if [ -n "$S3_RESULTS_UPLOAD" ]; then
219  tar --zstd -cf results.tar.zst results/;
220  ci-fairy s3cp --token-file "${CI_JOB_JWT_FILE}" results.tar.zst https://"$S3_RESULTS_UPLOAD"/results.tar.zst;
221fi
222
223# We still need to echo the hwci: mesa message, as some scripts rely on it, such
224# as the python ones inside the bare-metal folder
225[ ${EXIT_CODE} -eq 0 ] && RESULT=pass || RESULT=fail
226
227set +x
228
229# Print the final result; both bare-metal and LAVA look for this string to get
230# the result of our run, so try really hard to get it out rather than losing
231# the run. The device gets shut down right at this point, and a630 seems to
232# enjoy corrupting the last line of serial output before shutdown.
233for _ in $(seq 0 3); do echo "hwci: mesa: $RESULT"; sleep 1; echo; done
234
235exit $EXIT_CODE
236