• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (c) 2019 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This is a base host class for servohost and labstation."""
10
11
12import httplib
13import logging
14import socket
15import traceback
16import xmlrpclib
17
18from autotest_lib.client.bin import utils
19from autotest_lib.client.common_lib import error
20from autotest_lib.client.common_lib import hosts
21from autotest_lib.client.common_lib import lsbrelease_utils
22from autotest_lib.client.common_lib.cros import dev_server
23from autotest_lib.client.cros import constants as client_constants
24from autotest_lib.server import afe_utils
25from autotest_lib.server import site_utils as server_utils
26from autotest_lib.server.cros import autoupdater
27from autotest_lib.server.hosts import ssh_host
28from autotest_lib.site_utils.rpm_control_system import rpm_client
29
30try:
31    from chromite.lib import metrics
32except ImportError:
33    metrics = utils.metrics_mock
34
35
36class BaseServoHost(ssh_host.SSHHost):
37    """Base host class for a host that manage servo(s).
38     E.g. beaglebone, labstation.
39    """
40    REBOOT_CMD = 'sleep 5; reboot & sleep 10; reboot -f'
41
42    TEMP_FILE_DIR = '/var/lib/servod/'
43
44    LOCK_FILE_POSTFIX = '_in_use'
45    REBOOT_FILE_POSTFIX = '_reboot'
46
47    # Time to wait a rebooting servohost, in seconds.
48    REBOOT_TIMEOUT = 240
49
50    # Timeout value to power cycle a servohost, in seconds.
51    BOOT_TIMEOUT = 240
52
53
54    def _initialize(self, hostname, is_in_lab=None, *args, **dargs):
55        """Construct a BaseServoHost object.
56
57        @param is_in_lab: True if the servo host is in Cros Lab. Default is set
58                          to None, for which utils.host_is_in_lab_zone will be
59                          called to check if the servo host is in Cros lab.
60
61        """
62        super(BaseServoHost, self)._initialize(hostname=hostname,
63                                               *args, **dargs)
64        self._is_localhost = (self.hostname == 'localhost')
65        if self._is_localhost:
66            self._is_in_lab = False
67        elif is_in_lab is None:
68            self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
69        else:
70            self._is_in_lab = is_in_lab
71
72        # Commands on the servo host must be run by the superuser.
73        # Our account on a remote host is root, but if our target is
74        # localhost then we might be running unprivileged.  If so,
75        # `sudo` will have to be added to the commands.
76        if self._is_localhost:
77            self._sudo_required = utils.system_output('id -u') != '0'
78        else:
79            self._sudo_required = False
80
81        self._is_labstation = None
82        self._dut_host_info = None
83
84
85    def get_board(self):
86        """Determine the board for this servo host. E.g. fizz-labstation
87
88        @returns a string representing this labstation's board or None if
89         target host is not using a ChromeOS image(e.g. test in chroot).
90        """
91        output = self.run('cat /etc/lsb-release', ignore_status=True).stdout
92        return lsbrelease_utils.get_current_board(lsb_release_content=output)
93
94
95    def set_dut_host_info(self, hi):
96        logging.info('setting dut_host_info field to (%s)', hi)
97        self._dut_host_info = hi
98
99
100    def is_labstation(self):
101        """Determine if the host is a labstation
102
103        @returns True if ths host is a labstation otherwise False.
104        """
105        if self._is_labstation is None:
106            board = self.get_board()
107            self._is_labstation = board is not None and 'labstation' in board
108
109        return self._is_labstation
110
111
112    def _get_release_version(self):
113        """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release.
114
115        @returns The version string in lsb-release, under attribute
116                 CHROMEOS_RELEASE_VERSION.
117        """
118        lsb_release_content = self.run(
119            'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip()
120        return lsbrelease_utils.get_chromeos_release_version(
121            lsb_release_content=lsb_release_content)
122
123
124    def _check_update_status(self):
125        dummy_updater = autoupdater.ChromiumOSUpdater(update_url="", host=self)
126        return dummy_updater.check_update_status()
127
128
129    def is_in_lab(self):
130        """Check whether the servo host is a lab device.
131
132        @returns: True if the servo host is in Cros Lab, otherwise False.
133
134        """
135        return self._is_in_lab
136
137
138    def is_localhost(self):
139        """Checks whether the servo host points to localhost.
140
141        @returns: True if it points to localhost, otherwise False.
142
143        """
144        return self._is_localhost
145
146
147    def is_cros_host(self):
148        """Check if a servo host is running chromeos.
149
150        @return: True if the servo host is running chromeos.
151            False if it isn't, or we don't have enough information.
152        """
153        try:
154            result = self.run('grep -q CHROMEOS /etc/lsb-release',
155                              ignore_status=True, timeout=10)
156        except (error.AutoservRunError, error.AutoservSSHTimeout):
157            return False
158        return result.exit_status == 0
159
160
161    def reboot(self, *args, **dargs):
162        """Reboot using special servo host reboot command."""
163        super(BaseServoHost, self).reboot(reboot_cmd=self.REBOOT_CMD,
164                                          *args, **dargs)
165
166
167    def update_image(self, wait_for_update=False):
168        """Update the image on the servo host, if needed.
169
170        This method recognizes the following cases:
171          * If the Host is not running Chrome OS, do nothing.
172          * If a previously triggered update is now complete, reboot
173            to the new version.
174          * If the host is processing a previously triggered update,
175            do nothing.
176          * If the host is running a version of Chrome OS different
177            from the default for servo Hosts, trigger an update, but
178            don't wait for it to complete.
179
180        @param wait_for_update If an update needs to be applied and
181            this is true, then don't return until the update is
182            downloaded and finalized, and the host rebooted.
183        @raises dev_server.DevServerException: If all the devservers are down.
184        @raises site_utils.ParseBuildNameException: If the devserver returns
185            an invalid build name.
186        @raises AutoservRunError: If the update_engine_client isn't present on
187            the host, and the host is a cros_host.
188
189        """
190        # servod could be running in a Ubuntu workstation.
191        if not self.is_cros_host():
192            logging.info('Not attempting an update, either %s is not running '
193                         'chromeos or we cannot find enough information about '
194                         'the host.', self.hostname)
195            return
196
197        if lsbrelease_utils.is_moblab():
198            logging.info('Not attempting an update, %s is running moblab.',
199                         self.hostname)
200            return
201
202
203        # NOTE: we can't just use getattr because servo_cros_stable_version is a property
204        servo_version_from_hi = None
205        logging.debug("BaseServoHost::update_image attempted to get servo cros stable version")
206        try:
207            servo_version_from_hi = self._dut_host_info.servo_cros_stable_version
208        except Exception:
209            logging.error("BaseServoHost::update_image failed to get servo cros stable version (%s)", traceback.format_exc())
210
211        target_build = afe_utils.get_stable_servo_cros_image_name_v2(
212            servo_version_from_hi=servo_version_from_hi,
213            board=self.get_board(),
214        )
215        target_build_number = server_utils.ParseBuildName(
216            target_build)[3]
217        current_build_number = self._get_release_version()
218
219        if current_build_number == target_build_number:
220            logging.info('servo host %s does not require an update.',
221                         self.hostname)
222            return
223
224        status = self._check_update_status()
225        if status in autoupdater.UPDATER_PROCESSING_UPDATE:
226            logging.info('servo host %s already processing an update, update '
227                         'engine client status=%s', self.hostname, status)
228        elif status == autoupdater.UPDATER_NEED_REBOOT:
229            logging.info('An update has been completed and pending reboot now.')
230            # Labstation reboot is handled separately here as it require
231            # synchronized reboot among all managed DUTs.
232            if not self.is_labstation():
233                self._servo_host_reboot()
234        else:
235            # For servo image staging, we want it as more widely distributed as
236            # possible, so that devservers' load can be evenly distributed.
237            # So use hostname instead of target_build as hash.
238            ds = dev_server.ImageServer.resolve(self.hostname,
239                                                hostname=self.hostname)
240            url = ds.get_update_url(target_build)
241
242            updater = autoupdater.ChromiumOSUpdater(update_url=url, host=self)
243
244            logging.info('Using devserver url: %s to trigger update on '
245                         'servo host %s, from %s to %s', url, self.hostname,
246                         current_build_number, target_build_number)
247            try:
248                ds.stage_artifacts(target_build,
249                                   artifacts=['full_payload'])
250            except Exception as e:
251                logging.error('Staging artifacts failed: %s', str(e))
252                logging.error('Abandoning update for this cycle.')
253            else:
254                try:
255                    updater.trigger_update()
256                except autoupdater.RootFSUpdateError as e:
257                    trigger_download_status = 'failed with %s' % str(e)
258                    metrics.Counter('chromeos/autotest/servo/'
259                                    'rootfs_update_failed').increment()
260                else:
261                    trigger_download_status = 'passed'
262                logging.info('Triggered download and update %s for %s, '
263                             'update engine currently in status %s',
264                             trigger_download_status, self.hostname,
265                             updater.check_update_status())
266
267        if wait_for_update:
268            logging.info('Waiting for servo update to complete.')
269            self.run('update_engine_client --follow', ignore_status=True)
270
271
272    def has_power(self):
273        """Return whether or not the servo host is powered by PoE or RPM."""
274        # TODO(fdeng): See crbug.com/302791
275        # For now, assume all servo hosts in the lab have power.
276        return self.is_in_lab()
277
278
279    def power_cycle(self):
280        """Cycle power to this host via PoE(servo v3) or RPM(labstation)
281        if it is a lab device.
282
283        @raises AutoservRepairError if it fails to power cycle the
284                servo host.
285
286        """
287        if self.has_power():
288            try:
289                rpm_client.set_power(self, 'CYCLE')
290            except (socket.error, xmlrpclib.Error,
291                    httplib.BadStatusLine,
292                    rpm_client.RemotePowerException) as e:
293                raise hosts.AutoservRepairError(
294                    'Power cycling %s failed: %s' % (self.hostname, e),
295                    'power_cycle_via_rpm_failed'
296                )
297        else:
298            logging.info('Skipping power cycling, not a lab device.')
299
300
301    def _servo_host_reboot(self):
302        """Reboot this servo host because a reboot is requested."""
303        logging.info('Rebooting servo host %s from build %s', self.hostname,
304                     self._get_release_version())
305        # Tell the reboot() call not to wait for completion.
306        # Otherwise, the call will log reboot failure if servo does
307        # not come back.  The logged reboot failure will lead to
308        # test job failure.  If the test does not require servo, we
309        # don't want servo failure to fail the test with error:
310        # `Host did not return from reboot` in status.log.
311        self.reboot(fastsync=True, wait=False)
312
313        # We told the reboot() call not to wait, but we need to wait
314        # for the reboot before we continue.  Alas.  The code from
315        # here below is basically a copy of Host.wait_for_restart(),
316        # with the logging bits ripped out, so that they can't cause
317        # the failure logging problem described above.
318        #
319        # The black stain that this has left on my soul can never be
320        # erased.
321        old_boot_id = self.get_boot_id()
322        if not self.wait_down(timeout=self.WAIT_DOWN_REBOOT_TIMEOUT,
323                              warning_timer=self.WAIT_DOWN_REBOOT_WARNING,
324                              old_boot_id=old_boot_id):
325            raise error.AutoservHostError(
326                'servo host %s failed to shut down.' %
327                self.hostname)
328        if self.wait_up(timeout=self.REBOOT_TIMEOUT):
329            logging.info('servo host %s back from reboot, with build %s',
330                         self.hostname, self._get_release_version())
331        else:
332            raise error.AutoservHostError(
333                'servo host %s failed to come back from reboot.' %
334                self.hostname)
335
336
337    def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
338        connect_timeout=None, alive_interval=None, alive_count_max=None,
339        connection_attempts=None):
340        """Override default make_ssh_command to use tuned options.
341
342        Tuning changes:
343          - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
344          connection failure. Consistency with remote_access.py.
345
346          - ServerAliveInterval=180; which causes SSH to ping connection every
347          180 seconds. In conjunction with ServerAliveCountMax ensures
348          that if the connection dies, Autotest will bail out quickly.
349
350          - ServerAliveCountMax=3; consistency with remote_access.py.
351
352          - ConnectAttempts=4; reduce flakiness in connection errors;
353          consistency with remote_access.py.
354
355          - UserKnownHostsFile=/dev/null; we don't care about the keys.
356
357          - SSH protocol forced to 2; needed for ServerAliveInterval.
358
359        @param user User name to use for the ssh connection.
360        @param port Port on the target host to use for ssh connection.
361        @param opts Additional options to the ssh command.
362        @param hosts_file Ignored.
363        @param connect_timeout Ignored.
364        @param alive_interval Ignored.
365        @param alive_count_max Ignored.
366        @param connection_attempts Ignored.
367
368        @returns: An ssh command with the requested settings.
369
370        """
371        options = ' '.join([opts, '-o Protocol=2'])
372        return super(BaseServoHost, self).make_ssh_command(
373            user=user, port=port, opts=options, hosts_file='/dev/null',
374            connect_timeout=30, alive_interval=180, alive_count_max=3,
375            connection_attempts=4)
376
377
378    def _make_scp_cmd(self, sources, dest):
379        """Format scp command.
380
381        Given a list of source paths and a destination path, produces the
382        appropriate scp command for encoding it. Remote paths must be
383        pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
384        to allow additional ssh options.
385
386        @param sources: A list of source paths to copy from.
387        @param dest: Destination path to copy to.
388
389        @returns: An scp command that copies |sources| on local machine to
390                  |dest| on the remote servo host.
391
392        """
393        command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
394                   '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
395        return command % (self._master_ssh.ssh_option,
396                          self.port, sources, dest)
397
398
399    def run(self, command, timeout=3600, ignore_status=False,
400        stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
401        connect_timeout=30, ssh_failure_retry_ok=False,
402        options='', stdin=None, verbose=True, args=()):
403        """Run a command on the servo host.
404
405        Extends method `run` in SSHHost. If the servo host is a remote device,
406        it will call `run` in SSHost without changing anything.
407        If the servo host is 'localhost', it will call utils.system_output.
408
409        @param command: The command line string.
410        @param timeout: Time limit in seconds before attempting to
411                        kill the running process. The run() function
412                        will take a few seconds longer than 'timeout'
413                        to complete if it has to kill the process.
414        @param ignore_status: Do not raise an exception, no matter
415                              what the exit code of the command is.
416        @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
417        @param connect_timeout: SSH connection timeout (in seconds)
418                                Ignored if host is 'localhost'.
419        @param options: String with additional ssh command options
420                        Ignored if host is 'localhost'.
421        @param ssh_failure_retry_ok: when True and ssh connection failure is
422                                     suspected, OK to retry command (but not
423                                     compulsory, and likely not needed here)
424        @param stdin: Stdin to pass (a string) to the executed command.
425        @param verbose: Log the commands.
426        @param args: Sequence of strings to pass as arguments to command by
427                     quoting them in " and escaping their contents if necessary.
428
429        @returns: A utils.CmdResult object.
430
431        @raises AutoservRunError if the command failed.
432        @raises AutoservSSHTimeout SSH connection has timed out. Only applies
433                when servo host is not 'localhost'.
434
435        """
436        run_args = {'command': command, 'timeout': timeout,
437                    'ignore_status': ignore_status, 'stdout_tee': stdout_tee,
438                    'stderr_tee': stderr_tee, 'stdin': stdin,
439                    'verbose': verbose, 'args': args}
440        if self.is_localhost():
441            if self._sudo_required:
442                run_args['command'] = 'sudo -n sh -c "%s"' % utils.sh_escape(
443                    command)
444            try:
445                return utils.run(**run_args)
446            except error.CmdError as e:
447                logging.error(e)
448                raise error.AutoservRunError('command execution error',
449                                             e.result_obj)
450        else:
451            run_args['connect_timeout'] = connect_timeout
452            run_args['options'] = options
453            return super(BaseServoHost, self).run(**run_args)
454