1""" 2In a few words: some devices in Mesa CI has problematic serial connection, they 3may hang (become silent) intermittently. Every time it hangs for minutes, the 4job is retried, causing delays in the overall pipeline executing, ultimately 5blocking legit MRs to merge. 6 7To reduce reliance on UART, we explored LAVA features, such as running docker 8containers as a test alongside the DUT one, to be able to create an SSH server 9in the DUT the earliest possible and an SSH client in a docker container, to 10establish a SSH session between both, allowing the console output to be passed 11via SSH pseudo terminal, instead of relying in the error-prone UART. 12 13In more detail, we aim to use "export -p" to share the initial boot environment 14with SSH LAVA test-cases. 15The "init-stage1.sh" script handles tasks such as system mounting and network 16setup, which are necessary for allocating a pseudo-terminal under "/dev/pts". 17Although these chores are not required for establishing an SSH session, they are 18essential for proper functionality to the target script given by HWCI_SCRIPT 19environment variable. 20 21Therefore, we have divided the job definition into four parts: 22 231. [DUT] Logging in to DUT and run the SSH server with root access. 242. [DUT] Running the "init-stage1.sh" script for the first SSH test case. 253. [DUT] Export the first boot environment to `/dut-env-vars.sh` file. 264. [SSH] Enabling the pseudo-terminal for colors and running the "init-stage2.sh" 27script after sourcing "dut-env-vars.sh" again for the second SSH test case. 28""" 29 30 31import re 32from typing import TYPE_CHECKING, Any, Iterable 33 34from ruamel.yaml.scalarstring import LiteralScalarString 35 36from .constants import NUMBER_OF_ATTEMPTS_LAVA_BOOT 37 38if TYPE_CHECKING: 39 from ..lava_job_submitter import LAVAJobSubmitter 40 41# Very early SSH server setup. Uses /dut_ready file to flag it is done. 42SSH_SERVER_COMMANDS = { 43 "auto_login": { 44 "login_commands": [ 45 "dropbear -R -B", 46 "touch /dut_ready", 47 ], 48 "login_prompt": "ogin:", 49 # To login as root, the username should be empty 50 "username": "", 51 } 52} 53 54# TODO: Extract this inline script to a shell file, like we do with 55# init-stage[12].sh 56# The current way is difficult to maintain because one has to deal with escaping 57# characters for both Python and the resulting job definition YAML. 58# Plus, it always good to lint bash scripts with shellcheck. 59DOCKER_COMMANDS = [ 60 """set -ex 61timeout 1m bash << EOF 62while [ -z "$(lava-target-ip)" ]; do 63 echo Waiting for DUT to join LAN; 64 sleep 1; 65done 66EOF 67 68ping -c 5 -w 60 $(lava-target-ip) 69 70lava_ssh_test_case() { 71 set -x 72 local test_case="${1}" 73 shift 74 lava-test-case \"${test_case}\" --shell \\ 75 ssh ${SSH_PTY_ARGS:--T} \\ 76 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \\ 77 root@$(lava-target-ip) \"${@}\" 78}""", 79] 80 81 82def to_yaml_block(steps_array: Iterable[str], escape_vars=[]) -> LiteralScalarString: 83 def escape_envvar(match): 84 return "\\" + match.group(0) 85 86 filtered_array = [s for s in steps_array if s.strip() and not s.startswith("#")] 87 final_str = "\n".join(filtered_array) 88 89 for escape_var in escape_vars: 90 # Find env vars and add '\\' before them 91 final_str = re.sub(rf"\${escape_var}*", escape_envvar, final_str) 92 return LiteralScalarString(final_str) 93 94 95def generate_dut_test(args: "LAVAJobSubmitter", first_stage_steps: list[str]) -> dict[str, Any]: 96 # Commands executed on DUT. 97 # Trying to execute the minimal number of commands, because the console data is 98 # retrieved via UART, which is hang-prone in some devices. 99 return { 100 "namespace": "dut", 101 "definitions": [ 102 { 103 "from": "inline", 104 "name": "setup-ssh-server", 105 "path": "inline-setup-ssh-server", 106 "repository": { 107 "metadata": { 108 "format": "Lava-Test Test Definition 1.0", 109 "name": "dut-env-export", 110 }, 111 "run": { 112 "steps": [ 113 to_yaml_block(first_stage_steps), 114 "export -p > /dut-env-vars.sh", # Exporting the first boot environment 115 ], 116 }, 117 }, 118 } 119 ], 120 } 121 122 123def generate_docker_test( 124 args: "LAVAJobSubmitter", artifact_download_steps: list[str] 125) -> dict[str, Any]: 126 # This is a growing list of commands that will be executed by the docker 127 # guest, which will be the SSH client. 128 docker_commands = [] 129 130 # LAVA test wrapping Mesa CI job in a SSH session. 131 init_stages_test = { 132 "namespace": "container", 133 "timeout": {"minutes": args.job_timeout_min}, 134 "failure_retry": 3, 135 "definitions": [ 136 { 137 "name": "docker_ssh_client", 138 "from": "inline", 139 "path": "inline/docker_ssh_client.yaml", 140 "repository": { 141 "metadata": { 142 "name": "mesa", 143 "description": "Mesa test plan", 144 "format": "Lava-Test Test Definition 1.0", 145 }, 146 "run": {"steps": docker_commands}, 147 }, 148 } 149 ], 150 "docker": { 151 "image": args.ssh_client_image, 152 }, 153 } 154 155 docker_commands += [ 156 to_yaml_block(DOCKER_COMMANDS, escape_vars=["LAVA_TARGET_IP"]), 157 "lava_ssh_test_case 'wait_for_dut_login' << EOF", 158 "while [ ! -e /dut_ready ]; do sleep 1; done;", 159 "EOF", 160 to_yaml_block( 161 ( 162 "lava_ssh_test_case 'artifact_download' 'bash --' << EOF", 163 "source /dut-env-vars.sh", 164 *artifact_download_steps, 165 "EOF", 166 ) 167 ), 168 "export SSH_PTY_ARGS=-tt", 169 # Putting CI_JOB name as the testcase name, it may help LAVA farm 170 # maintainers with monitoring 171 f"lava_ssh_test_case '{args.project_name}_{args.mesa_job_name}' " 172 # Changing directory to /, as the HWCI_SCRIPT expects that 173 "'\"cd / && /init-stage2.sh\"'", 174 ] 175 176 return init_stages_test 177 178 179def wrap_final_deploy_action(final_deploy_action: dict): 180 wrap = { 181 "namespace": "dut", 182 "failure_retry": NUMBER_OF_ATTEMPTS_LAVA_BOOT, 183 "timeout": {"minutes": 10}, 184 } 185 186 final_deploy_action.update(wrap) 187 188 189def wrap_boot_action(boot_action: dict): 190 wrap = { 191 "namespace": "dut", 192 "failure_retry": NUMBER_OF_ATTEMPTS_LAVA_BOOT, 193 **SSH_SERVER_COMMANDS, 194 } 195 196 boot_action.update(wrap) 197