• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# Copyright 2023 The Chromium Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Helpers to reliably reboot the device via serial and fastboot.
6
7Note, this file will be executed in docker instance without vpython3, so we use
8python3 instead. The docker instance runs this file as a symbolic link of dmc
9via the "main" function.
10"""
11
12import json
13import logging
14import os
15import shutil
16import signal
17import subprocess
18import sys
19import time
20
21from typing import List
22from boot_device import BootMode
23from compatible_utils import running_unattended
24
25# pylint: disable=too-many-return-statements, too-many-branches
26
27
28def _env_ready() -> bool:
29    """Checks if the required environment is ready to support the functions in
30    this file."""
31    if shutil.which('fastboot') is None:
32        logging.warning('fastboot is not accessible')
33        return False
34    if shutil.which('serialio') is None:
35        logging.warning('serialio is not accessible')
36        return False
37    return True
38
39
40def boot_device(node_id: str,
41                serial_num: str,
42                mode: BootMode,
43                must_boot: bool = False) -> bool:
44    """Boots device into desired mode via serial and fastboot.
45    This function waits for at most 10 minutes for the transition.
46
47    Args:
48        node_id: The fuchsia node id of the device.
49        serial_num: The fastboot serial number of the device.
50        mode: Desired boot mode.
51        must_boot: Forces device to reboot regardless the current state.
52
53    Returns:
54        a boolean value to indicate if the operation succeeded; missing
55        dependencies like serialio (for serial access) and fastboot, or the
56        device cannot be found may also introduce the error.
57    """
58    #TODO(crbug.com/40935296): Remove the default values once the use in
59    # flash_device has been migrated.
60    if node_id is None:
61        node_id = os.getenv('FUCHSIA_NODENAME')
62    if serial_num is None:
63        serial_num = os.getenv('FUCHSIA_FASTBOOT_SERNUM')
64    assert node_id is not None
65    assert serial_num is not None
66
67    assert mode in [BootMode.REGULAR, BootMode.BOOTLOADER
68                    ], 'Unsupported BootMode %s for serial_boot_device.' % mode
69    assert _env_ready()
70
71    if is_in_fastboot(serial_num):
72        # fastboot is stateless and there isn't a reason to reboot the device
73        # again to go to the fastboot.
74        if mode == BootMode.BOOTLOADER:
75            return True
76        if not _run_fastboot(['reboot'], serial_num):
77            # Shouldn't return None here, unless the device was rebooting. In
78            # the case, it would be safer to return false.
79            return False
80    else:
81        # Even not must_boot, still check if the device is running fuchsia to
82        # detect the broken state and force a reboot to recover it.
83        if is_in_fuchsia(node_id):
84            if not must_boot and mode == BootMode.REGULAR:
85                return True
86        else:
87            logging.error('Cannot find node id %s or fastboot serial number '
88                          '%s, the os may run into panic, will try to use dm '
89                          'to reboot it anyway.',
90                          node_id, serial_num)
91        # pylint: disable=subprocess-run-check
92        if subprocess.run([
93                'serialio', node_id, 'send', 'dm', 'reboot' +
94                ('' if mode == BootMode.REGULAR else '-bootloader')
95        ]).returncode != 0:
96            logging.error('Failed to send dm reboot[-bootloader] via serialio')
97            return False
98
99    start_sec = time.time()
100    while time.time() - start_sec < 600:
101        assert mode in [BootMode.REGULAR, BootMode.BOOTLOADER]
102        if mode == BootMode.REGULAR and is_in_fuchsia(node_id):
103            return True
104        if mode == BootMode.BOOTLOADER and is_in_fastboot(serial_num):
105            return True
106    logging.error(
107        'Failed to transite node id %s or fastboot serial number %s '
108        'to expected state %s', node_id, serial_num, mode)
109    return False
110
111
112def _serialio_send_and_wait(node_id: str, command: List[str],
113                            waitfor: str) -> bool:
114    """Continously sends the command to the device and waits for the waitfor
115    string via serialio.
116    This function asserts the existence of serialio and waits at most ~30
117    seconds."""
118    assert shutil.which('serialio') is not None
119    start_sec = time.time()
120    with subprocess.Popen(['serialio', node_id, 'wait', waitfor],
121                          stdout=subprocess.DEVNULL,
122                          stderr=subprocess.DEVNULL) as proc:
123        while time.time() - start_sec < 28:
124            send_command = ['serialio', node_id, 'send']
125            send_command.extend(command)
126            # pylint: disable=subprocess-run-check
127            if subprocess.run(send_command).returncode != 0:
128                logging.error('Failed to send %s via serialio to %s', command,
129                              node_id)
130                return False
131            result = proc.poll()
132            if result is not None:
133                if result == 0:
134                    return True
135                logging.error(
136                    'Failed to wait %s via serial to %s, '
137                    'return code %s', waitfor, node_id, result)
138                return False
139            time.sleep(2)
140        proc.kill()
141    logging.error('Have not found %s via serialio to %s', waitfor, node_id)
142    return False
143
144
145def is_in_fuchsia(node_id: str) -> bool:
146    """Checks if the device is running in fuchsia through serial.
147    Note, this check goes through serial and does not guarantee the fuchsia os
148    has a workable network or ssh connection.
149    This function asserts the existence of serialio and waits at most ~60
150    seconds."""
151    if not _serialio_send_and_wait(
152            node_id, ['echo', 'yes-i-am-healthy', '|', 'sha1sum'],
153            '89d517b7db104aada669a83bc3c3a906e00671f7'):
154        logging.error(
155            'Device %s did not respond echo, '
156            'it may not be running fuchsia', node_id)
157        return False
158    if not _serialio_send_and_wait(node_id, ['ps'], 'sshd'):
159        logging.warning(
160            'Cannot find sshd from ps on %s, the ssh '
161            'connection may not be available.', node_id)
162    return True
163
164
165def is_in_fastboot(serial_num: str) -> bool:
166    """Checks if the device is running in fastboot through fastboot command.
167    Note, the fastboot may be impacted by the usb congestion and causes this
168    function to return false.
169    This function asserts the existence of fastboot and waits at most ~30
170    seconds."""
171    start_sec = time.time()
172    while time.time() - start_sec < 28:
173        result = _run_fastboot(['getvar', 'product'], serial_num)
174        if result is None:
175            return False
176        if result:
177            return True
178        time.sleep(2)
179    logging.error('Failed to wait for fastboot state of %s', serial_num)
180    return False
181
182
183def _run_fastboot(args: List[str], serial_num: str) -> bool:
184    """Executes the fastboot command and kills the hanging process.
185    The fastboot may be impacted by the usb congestion and causes the process to
186    hang forever. So this command waits for 30 seconds before killing the
187    process, and it's not good for flashing.
188    Note, if this function detects the fastboot is waiting for the device, i.e.
189    the device is not in the fastboot, it returns None instead, e.g. unknown.
190    This function asserts the existence of fastboot."""
191    assert shutil.which('fastboot') is not None
192    args.insert(0, 'fastboot')
193    args.extend(('-s', serial_num))
194    try:
195        # Capture output to ensure we can get '< waiting for serial-num >'
196        # output.
197        # pylint: disable=subprocess-run-check
198        if subprocess.run(args, capture_output=True,
199                          timeout=30).returncode == 0:
200            return True
201    except subprocess.TimeoutExpired as timeout:
202        if timeout.stderr is not None and serial_num.lower(
203        ) in timeout.stderr.decode().lower():
204            logging.warning('fastboot is still waiting for %s', serial_num)
205            return None
206    logging.error('Failed to run %s against fastboot %s', args, serial_num)
207    return False
208
209
210def _shutdown_if_serial_is_unavailable(node_id: str) -> None:
211    if not running_unattended():
212        return
213    # pylint: disable=subprocess-run-check
214    if subprocess.run(['serialio', node_id, 'poll']).returncode != 0:
215        logging.warning('shutting down the docker by killing the pid 1')
216        # Before killing the process itself, force shutting down the logging to
217        # flush everything.
218        logging.shutdown()
219        # In docker instance, killing root process will cause the instance to be
220        # shut down and restarted by swarm_docker. So the updated tty can be
221        # attached to the new docker instance.
222        os.kill(1, signal.SIGTERM)
223
224
225def main(action: str) -> int:
226    """Main entry of serial_boot_device."""
227    node_id = os.getenv('FUCHSIA_NODENAME')
228    serial_num = os.getenv('FUCHSIA_FASTBOOT_SERNUM')
229    assert node_id is not None
230    assert serial_num is not None
231
232    handlers = [logging.StreamHandler()]
233    if os.path.isdir('/home/swarming/'):
234        handlers.append(
235            logging.FileHandler('/home/swarming/dmc.%s.log' % node_id))
236    logging.basicConfig(format='%(levelname)s %(asctime)s %(message)s',
237                        handlers=handlers,
238                        level=logging.INFO)
239    logging.info('Running command %s against %s %s', sys.argv, node_id,
240                 serial_num)
241
242    # Checks the environment after initializing the logging.
243    if not _env_ready():
244        logging.error('Missing environment setup, unable to perform action.')
245        return 2
246
247    if action == 'health-check':
248        _shutdown_if_serial_is_unavailable(node_id)
249        if is_in_fuchsia(node_id) or is_in_fastboot(serial_num):
250            # Print out the json result without using logging to avoid any
251            # potential formatting issue.
252            print(
253                json.dumps([{
254                    'nodename': node_id,
255                    'state': 'healthy',
256                    'status_message': '',
257                    'dms_state': ''
258                }]))
259            return 0
260        logging.error('Cannot find node id %s or fastboot serial number %s',
261                      node_id, serial_num)
262        return 1
263    if action in ['reboot', 'after-task']:
264        if action == 'after-task':
265            _shutdown_if_serial_is_unavailable(node_id)
266        if boot_device(node_id, serial_num, BootMode.REGULAR, must_boot=True):
267            return 0
268        logging.error(
269            'Cannot reboot the device with node id %s and fastboot '
270            'serial number %s', node_id, serial_num)
271        return 1
272    if action == 'reboot-fastboot':
273        if boot_device(node_id,
274                       serial_num,
275                       BootMode.BOOTLOADER,
276                       must_boot=True):
277            return 0
278        logging.error(
279            'Cannot reboot the device with node id %s and fastboot '
280            'serial number %s into fastboot', node_id, serial_num)
281        return 1
282    if action == 'is-in-fuchsia':
283        if is_in_fuchsia(node_id):
284            return 0
285        logging.error('Cannot find node id %s', node_id)
286        return 1
287    if action == 'is-in-fastboot':
288        if is_in_fastboot(serial_num):
289            return 0
290        logging.error('Cannot find fastboot serial number %s', serial_num)
291        return 1
292    if action == 'server-version':
293        # TODO(crbug.com/40935296): Implement the server-version.
294        print('chromium')
295        return 0
296    if action == 'before-task':
297        # TODO(crbug.com/40935296): fuchsia.py requires IMAGE_MANIFEST_PATH and
298        # BOOTSERVER_PATH to support before-task call. So the following
299        # statement does not work as it should be.
300        _shutdown_if_serial_is_unavailable(node_id)
301        return 0
302    if action == 'set-power-state':
303        # Do nothing. The device is always restarted during after-task.
304        return 0
305    logging.error('Unknown command %s', action)
306    return 2
307
308
309if __name__ == '__main__':
310    sys.exit(main(sys.argv[1]))
311