1#!/usr/bin/env python3 2# Copyright 2023 The Chromium Authors 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5"""Helpers to reliably reboot the device via serial and fastboot. 6 7Note, this file will be executed in docker instance without vpython3, so we use 8python3 instead. The docker instance runs this file as a symbolic link of dmc 9via the "main" function. 10""" 11 12import json 13import logging 14import os 15import shutil 16import signal 17import subprocess 18import sys 19import time 20 21from typing import List 22from boot_device import BootMode 23from compatible_utils import running_unattended 24 25# pylint: disable=too-many-return-statements, too-many-branches 26 27 28def _env_ready() -> bool: 29 """Checks if the required environment is ready to support the functions in 30 this file.""" 31 if shutil.which('fastboot') is None: 32 logging.warning('fastboot is not accessible') 33 return False 34 if shutil.which('serialio') is None: 35 logging.warning('serialio is not accessible') 36 return False 37 return True 38 39 40def boot_device(node_id: str, 41 serial_num: str, 42 mode: BootMode, 43 must_boot: bool = False) -> bool: 44 """Boots device into desired mode via serial and fastboot. 45 This function waits for at most 10 minutes for the transition. 46 47 Args: 48 node_id: The fuchsia node id of the device. 49 serial_num: The fastboot serial number of the device. 50 mode: Desired boot mode. 51 must_boot: Forces device to reboot regardless the current state. 52 53 Returns: 54 a boolean value to indicate if the operation succeeded; missing 55 dependencies like serialio (for serial access) and fastboot, or the 56 device cannot be found may also introduce the error. 57 """ 58 #TODO(crbug.com/40935296): Remove the default values once the use in 59 # flash_device has been migrated. 60 if node_id is None: 61 node_id = os.getenv('FUCHSIA_NODENAME') 62 if serial_num is None: 63 serial_num = os.getenv('FUCHSIA_FASTBOOT_SERNUM') 64 assert node_id is not None 65 assert serial_num is not None 66 67 assert mode in [BootMode.REGULAR, BootMode.BOOTLOADER 68 ], 'Unsupported BootMode %s for serial_boot_device.' % mode 69 assert _env_ready() 70 71 if is_in_fastboot(serial_num): 72 # fastboot is stateless and there isn't a reason to reboot the device 73 # again to go to the fastboot. 74 if mode == BootMode.BOOTLOADER: 75 return True 76 if not _run_fastboot(['reboot'], serial_num): 77 # Shouldn't return None here, unless the device was rebooting. In 78 # the case, it would be safer to return false. 79 return False 80 else: 81 # Even not must_boot, still check if the device is running fuchsia to 82 # detect the broken state and force a reboot to recover it. 83 if is_in_fuchsia(node_id): 84 if not must_boot and mode == BootMode.REGULAR: 85 return True 86 else: 87 logging.error('Cannot find node id %s or fastboot serial number ' 88 '%s, the os may run into panic, will try to use dm ' 89 'to reboot it anyway.', 90 node_id, serial_num) 91 # pylint: disable=subprocess-run-check 92 if subprocess.run([ 93 'serialio', node_id, 'send', 'dm', 'reboot' + 94 ('' if mode == BootMode.REGULAR else '-bootloader') 95 ]).returncode != 0: 96 logging.error('Failed to send dm reboot[-bootloader] via serialio') 97 return False 98 99 start_sec = time.time() 100 while time.time() - start_sec < 600: 101 assert mode in [BootMode.REGULAR, BootMode.BOOTLOADER] 102 if mode == BootMode.REGULAR and is_in_fuchsia(node_id): 103 return True 104 if mode == BootMode.BOOTLOADER and is_in_fastboot(serial_num): 105 return True 106 logging.error( 107 'Failed to transite node id %s or fastboot serial number %s ' 108 'to expected state %s', node_id, serial_num, mode) 109 return False 110 111 112def _serialio_send_and_wait(node_id: str, command: List[str], 113 waitfor: str) -> bool: 114 """Continously sends the command to the device and waits for the waitfor 115 string via serialio. 116 This function asserts the existence of serialio and waits at most ~30 117 seconds.""" 118 assert shutil.which('serialio') is not None 119 start_sec = time.time() 120 with subprocess.Popen(['serialio', node_id, 'wait', waitfor], 121 stdout=subprocess.DEVNULL, 122 stderr=subprocess.DEVNULL) as proc: 123 while time.time() - start_sec < 28: 124 send_command = ['serialio', node_id, 'send'] 125 send_command.extend(command) 126 # pylint: disable=subprocess-run-check 127 if subprocess.run(send_command).returncode != 0: 128 logging.error('Failed to send %s via serialio to %s', command, 129 node_id) 130 return False 131 result = proc.poll() 132 if result is not None: 133 if result == 0: 134 return True 135 logging.error( 136 'Failed to wait %s via serial to %s, ' 137 'return code %s', waitfor, node_id, result) 138 return False 139 time.sleep(2) 140 proc.kill() 141 logging.error('Have not found %s via serialio to %s', waitfor, node_id) 142 return False 143 144 145def is_in_fuchsia(node_id: str) -> bool: 146 """Checks if the device is running in fuchsia through serial. 147 Note, this check goes through serial and does not guarantee the fuchsia os 148 has a workable network or ssh connection. 149 This function asserts the existence of serialio and waits at most ~60 150 seconds.""" 151 if not _serialio_send_and_wait( 152 node_id, ['echo', 'yes-i-am-healthy', '|', 'sha1sum'], 153 '89d517b7db104aada669a83bc3c3a906e00671f7'): 154 logging.error( 155 'Device %s did not respond echo, ' 156 'it may not be running fuchsia', node_id) 157 return False 158 if not _serialio_send_and_wait(node_id, ['ps'], 'sshd'): 159 logging.warning( 160 'Cannot find sshd from ps on %s, the ssh ' 161 'connection may not be available.', node_id) 162 return True 163 164 165def is_in_fastboot(serial_num: str) -> bool: 166 """Checks if the device is running in fastboot through fastboot command. 167 Note, the fastboot may be impacted by the usb congestion and causes this 168 function to return false. 169 This function asserts the existence of fastboot and waits at most ~30 170 seconds.""" 171 start_sec = time.time() 172 while time.time() - start_sec < 28: 173 result = _run_fastboot(['getvar', 'product'], serial_num) 174 if result is None: 175 return False 176 if result: 177 return True 178 time.sleep(2) 179 logging.error('Failed to wait for fastboot state of %s', serial_num) 180 return False 181 182 183def _run_fastboot(args: List[str], serial_num: str) -> bool: 184 """Executes the fastboot command and kills the hanging process. 185 The fastboot may be impacted by the usb congestion and causes the process to 186 hang forever. So this command waits for 30 seconds before killing the 187 process, and it's not good for flashing. 188 Note, if this function detects the fastboot is waiting for the device, i.e. 189 the device is not in the fastboot, it returns None instead, e.g. unknown. 190 This function asserts the existence of fastboot.""" 191 assert shutil.which('fastboot') is not None 192 args.insert(0, 'fastboot') 193 args.extend(('-s', serial_num)) 194 try: 195 # Capture output to ensure we can get '< waiting for serial-num >' 196 # output. 197 # pylint: disable=subprocess-run-check 198 if subprocess.run(args, capture_output=True, 199 timeout=30).returncode == 0: 200 return True 201 except subprocess.TimeoutExpired as timeout: 202 if timeout.stderr is not None and serial_num.lower( 203 ) in timeout.stderr.decode().lower(): 204 logging.warning('fastboot is still waiting for %s', serial_num) 205 return None 206 logging.error('Failed to run %s against fastboot %s', args, serial_num) 207 return False 208 209 210def _shutdown_if_serial_is_unavailable(node_id: str) -> None: 211 if not running_unattended(): 212 return 213 # pylint: disable=subprocess-run-check 214 if subprocess.run(['serialio', node_id, 'poll']).returncode != 0: 215 logging.warning('shutting down the docker by killing the pid 1') 216 # Before killing the process itself, force shutting down the logging to 217 # flush everything. 218 logging.shutdown() 219 # In docker instance, killing root process will cause the instance to be 220 # shut down and restarted by swarm_docker. So the updated tty can be 221 # attached to the new docker instance. 222 os.kill(1, signal.SIGTERM) 223 224 225def main(action: str) -> int: 226 """Main entry of serial_boot_device.""" 227 node_id = os.getenv('FUCHSIA_NODENAME') 228 serial_num = os.getenv('FUCHSIA_FASTBOOT_SERNUM') 229 assert node_id is not None 230 assert serial_num is not None 231 232 handlers = [logging.StreamHandler()] 233 if os.path.isdir('/home/swarming/'): 234 handlers.append( 235 logging.FileHandler('/home/swarming/dmc.%s.log' % node_id)) 236 logging.basicConfig(format='%(levelname)s %(asctime)s %(message)s', 237 handlers=handlers, 238 level=logging.INFO) 239 logging.info('Running command %s against %s %s', sys.argv, node_id, 240 serial_num) 241 242 # Checks the environment after initializing the logging. 243 if not _env_ready(): 244 logging.error('Missing environment setup, unable to perform action.') 245 return 2 246 247 if action == 'health-check': 248 _shutdown_if_serial_is_unavailable(node_id) 249 if is_in_fuchsia(node_id) or is_in_fastboot(serial_num): 250 # Print out the json result without using logging to avoid any 251 # potential formatting issue. 252 print( 253 json.dumps([{ 254 'nodename': node_id, 255 'state': 'healthy', 256 'status_message': '', 257 'dms_state': '' 258 }])) 259 return 0 260 logging.error('Cannot find node id %s or fastboot serial number %s', 261 node_id, serial_num) 262 return 1 263 if action in ['reboot', 'after-task']: 264 if action == 'after-task': 265 _shutdown_if_serial_is_unavailable(node_id) 266 if boot_device(node_id, serial_num, BootMode.REGULAR, must_boot=True): 267 return 0 268 logging.error( 269 'Cannot reboot the device with node id %s and fastboot ' 270 'serial number %s', node_id, serial_num) 271 return 1 272 if action == 'reboot-fastboot': 273 if boot_device(node_id, 274 serial_num, 275 BootMode.BOOTLOADER, 276 must_boot=True): 277 return 0 278 logging.error( 279 'Cannot reboot the device with node id %s and fastboot ' 280 'serial number %s into fastboot', node_id, serial_num) 281 return 1 282 if action == 'is-in-fuchsia': 283 if is_in_fuchsia(node_id): 284 return 0 285 logging.error('Cannot find node id %s', node_id) 286 return 1 287 if action == 'is-in-fastboot': 288 if is_in_fastboot(serial_num): 289 return 0 290 logging.error('Cannot find fastboot serial number %s', serial_num) 291 return 1 292 if action == 'server-version': 293 # TODO(crbug.com/40935296): Implement the server-version. 294 print('chromium') 295 return 0 296 if action == 'before-task': 297 # TODO(crbug.com/40935296): fuchsia.py requires IMAGE_MANIFEST_PATH and 298 # BOOTSERVER_PATH to support before-task call. So the following 299 # statement does not work as it should be. 300 _shutdown_if_serial_is_unavailable(node_id) 301 return 0 302 if action == 'set-power-state': 303 # Do nothing. The device is always restarted during after-task. 304 return 0 305 logging.error('Unknown command %s', action) 306 return 2 307 308 309if __name__ == '__main__': 310 sys.exit(main(sys.argv[1])) 311