• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Lint as: python2, python3
2# Copyright (c) 2019 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5#
6# Expects to be run in an environment with sudo and no interactive password
7# prompt, such as within the Chromium OS development chroot.
8
9
10"""This is a base host class for servohost and labstation."""
11
12
13import six.moves.http_client
14import logging
15import socket
16import six.moves.xmlrpc_client
17import time
18import os
19
20from autotest_lib.client.bin import utils
21from autotest_lib.client.common_lib import autotest_enum
22from autotest_lib.client.common_lib import error
23from autotest_lib.client.common_lib import hosts
24from autotest_lib.client.common_lib import lsbrelease_utils
25from autotest_lib.client.common_lib.cros import dev_server
26from autotest_lib.client.common_lib.cros import kernel_utils
27from autotest_lib.client.cros import constants as client_constants
28from autotest_lib.server import autotest
29from autotest_lib.server import site_utils as server_utils
30from autotest_lib.server.cros import provisioner
31from autotest_lib.server.hosts import ssh_host
32from autotest_lib.site_utils.rpm_control_system import rpm_client
33
34
35class BaseServoHost(ssh_host.SSHHost):
36    """Base host class for a host that manage servo(s).
37     E.g. beaglebone, labstation.
38    """
39    REBOOT_CMD = 'sleep 5; reboot & sleep 10; reboot -f'
40
41    TEMP_FILE_DIR = '/var/lib/servod/'
42
43    LOCK_FILE_POSTFIX = '_in_use'
44    REBOOT_FILE_POSTFIX = '_reboot'
45
46    # Time to wait a rebooting servohost, in seconds.
47    REBOOT_TIMEOUT = 240
48
49    # Timeout value to power cycle a servohost, in seconds.
50    BOOT_TIMEOUT = 240
51
52    # Constants that reflect current host update state.
53    UPDATE_STATE = autotest_enum.AutotestEnum('IDLE', 'RUNNING',
54                                              'PENDING_REBOOT')
55
56    def _initialize(self, hostname, is_in_lab=None, *args, **dargs):
57        """Construct a BaseServoHost object.
58
59        @param is_in_lab: True if the servo host is in Cros Lab. Default is set
60                          to None, for which utils.host_is_in_lab_zone will be
61                          called to check if the servo host is in Cros lab.
62
63        """
64        super(BaseServoHost, self)._initialize(hostname=hostname,
65                                               *args, **dargs)
66        self._is_localhost = (self.hostname == 'localhost')
67        if self._is_localhost:
68            self._is_in_lab = False
69        elif is_in_lab is None:
70            self._is_in_lab = utils.host_is_in_lab_zone(self.hostname)
71        else:
72            self._is_in_lab = is_in_lab
73
74        # Commands on the servo host must be run by the superuser.
75        # Our account on a remote host is root, but if our target is
76        # localhost then we might be running unprivileged.  If so,
77        # `sudo` will have to be added to the commands.
78        if self._is_localhost:
79            self._sudo_required = utils.system_output('id -u') != '0'
80        else:
81            self._sudo_required = False
82
83        self._is_labstation = None
84        self._dut_host_info = None
85        self._dut_hostname = None
86
87
88    def get_board(self):
89        """Determine the board for this servo host. E.g. fizz-labstation
90
91        @returns a string representing this labstation's board or None if
92         target host is not using a ChromeOS image(e.g. test in chroot).
93        """
94        output = self.run('cat /etc/lsb-release', ignore_status=True).stdout
95        return lsbrelease_utils.get_current_board(lsb_release_content=output)
96
97
98    def set_dut_host_info(self, dut_host_info):
99        """
100        @param dut_host_info: A HostInfo object.
101        """
102        logging.info('setting dut_host_info field to (%s)', dut_host_info)
103        self._dut_host_info = dut_host_info
104
105
106    def get_dut_host_info(self):
107        """
108        @return A HostInfo object.
109        """
110        return self._dut_host_info
111
112
113    def set_dut_hostname(self, dut_hostname):
114        """
115        @param dut_hostname: hostname of the DUT that connected to this servo.
116        """
117        logging.info('setting dut_hostname as (%s)', dut_hostname)
118        self._dut_hostname = dut_hostname
119
120
121    def get_dut_hostname(self):
122        """
123        @returns hostname of the DUT that connected to this servo.
124        """
125        return self._dut_hostname
126
127
128    def is_labstation(self):
129        """Determine if the host is a labstation
130
131        @returns True if ths host is a labstation otherwise False.
132        """
133        if self._is_labstation is None:
134            board = self.get_board()
135            self._is_labstation = board is not None and 'labstation' in board
136
137        return self._is_labstation
138
139
140    def _get_lsb_release_content(self):
141        """Return the content of lsb-release file of host."""
142        return self.run(
143            'cat "%s"' % client_constants.LSB_RELEASE).stdout.strip()
144
145
146    def get_release_version(self):
147        """Get the value of attribute CHROMEOS_RELEASE_VERSION from lsb-release.
148
149        @returns The version string in lsb-release, under attribute
150                 CHROMEOS_RELEASE_VERSION(e.g. 12900.0.0). None on fail.
151        """
152        return lsbrelease_utils.get_chromeos_release_version(
153            lsb_release_content=self._get_lsb_release_content()
154        )
155
156
157    def get_full_release_path(self):
158        """Get full release path from servohost as string.
159
160        @returns full release path as a string
161                 (e.g. fizz-labstation-release/R82.12900.0.0). None on fail.
162        """
163        return lsbrelease_utils.get_chromeos_release_builder_path(
164            lsb_release_content=self._get_lsb_release_content()
165        )
166
167
168    def _check_update_status(self):
169        """ Check servohost's current update state.
170
171        @returns: one of below state of from self.UPDATE_STATE
172            IDLE -- if the target host is not currently updating and not
173                pending on a reboot.
174            RUNNING -- if there is another updating process that running on
175                target host(note: we don't expect to hit this scenario).
176            PENDING_REBOOT -- if the target host had an update and pending
177                on reboot.
178        """
179        result = self.run('pgrep -f quick-provision | grep -v $$',
180                          ignore_status=True)
181        # We don't expect any output unless there are another quick
182        # provision process is running.
183        if result.exit_status == 0:
184            return self.UPDATE_STATE.RUNNING
185
186        # Determine if we have an update that pending on reboot by check if
187        # the current inactive kernel has priority for the next boot.
188        try:
189            inactive_kernel = kernel_utils.get_kernel_state(self)[1]
190            next_kernel = kernel_utils.get_next_kernel(self)
191            if inactive_kernel == next_kernel:
192                return self.UPDATE_STATE.PENDING_REBOOT
193        except Exception as e:
194            logging.error('Unexpected error while checking kernel info; %s', e)
195        return self.UPDATE_STATE.IDLE
196
197
198    def is_in_lab(self):
199        """Check whether the servo host is a lab device.
200
201        @returns: True if the servo host is in Cros Lab, otherwise False.
202
203        """
204        return self._is_in_lab
205
206
207    def is_localhost(self):
208        """Checks whether the servo host points to localhost.
209
210        @returns: True if it points to localhost, otherwise False.
211
212        """
213        return self._is_localhost
214
215
216    def is_cros_host(self):
217        """Check if a servo host is running chromeos.
218
219        @return: True if the servo host is running chromeos.
220            False if it isn't, or we don't have enough information.
221        """
222        try:
223            result = self.run('grep -q CHROMEOS /etc/lsb-release',
224                              ignore_status=True, timeout=10)
225        except (error.AutoservRunError, error.AutoservSSHTimeout):
226            return False
227        return result.exit_status == 0
228
229
230    def prepare_for_update(self):
231        """Prepares the DUT for an update.
232        Subclasses may override this to perform any special actions
233        required before updating.
234        """
235        pass
236
237
238    def reboot(self, *args, **dargs):
239        """Reboot using special servo host reboot command."""
240        super(BaseServoHost, self).reboot(reboot_cmd=self.REBOOT_CMD,
241                                          *args, **dargs)
242
243
244    def update_image(self, stable_version=None):
245        """Update the image on the servo host, if needed.
246
247        This method recognizes the following cases:
248          * If the Host is not running Chrome OS, do nothing.
249          * If a previously triggered update is now complete, reboot
250            to the new version.
251          * If the host is processing an update do nothing.
252          * If the host has an update that pending on reboot, do nothing.
253          * If the host is running a version of Chrome OS different
254            from the default for servo Hosts, start an update.
255
256        @stable_version the target build number.(e.g. R82-12900.0.0)
257
258        @raises dev_server.DevServerException: If all the devservers are down.
259        @raises site_utils.ParseBuildNameException: If the devserver returns
260            an invalid build name.
261        """
262        # servod could be running in a Ubuntu workstation.
263        if not self.is_cros_host():
264            logging.info('Not attempting an update, either %s is not running '
265                         'chromeos or we cannot find enough information about '
266                         'the host.', self.hostname)
267            return
268
269        if lsbrelease_utils.is_moblab():
270            logging.info('Not attempting an update, %s is running moblab.',
271                         self.hostname)
272            return
273
274        if not stable_version:
275            logging.debug("BaseServoHost::update_image attempting to get"
276                          " servo cros stable version")
277            try:
278                stable_version = (self.get_dut_host_info().
279                                  servo_cros_stable_version)
280            except AttributeError:
281                logging.error("BaseServoHost::update_image failed to get"
282                              " servo cros stable version.")
283
284        target_build = "%s-release/%s" % (self.get_board(), stable_version)
285        target_build_number = server_utils.ParseBuildName(
286            target_build)[3]
287        current_build_number = self.get_release_version()
288
289        if current_build_number == target_build_number:
290            logging.info('servo host %s does not require an update.',
291                         self.hostname)
292            return
293
294        status = self._check_update_status()
295        if status == self.UPDATE_STATE.RUNNING:
296            logging.info('servo host %s already processing an update',
297                         self.hostname)
298            return
299        if status == self.UPDATE_STATE.PENDING_REBOOT:
300            # Labstation reboot is handled separately here as it require
301            # synchronized reboot among all managed DUTs. For servo_v3, we'll
302            # reboot when initialize Servohost, if there is a update pending.
303            logging.info('An update has been completed and pending reboot.')
304            return
305
306        ds = dev_server.ImageServer.resolve(self.hostname,
307                                            hostname=self.hostname)
308        url = ds.get_update_url(target_build)
309        cros_provisioner = provisioner.ChromiumOSProvisioner(update_url=url,
310                                                             host=self,
311                                                             is_servohost=True)
312        logging.info('Using devserver url: %s to trigger update on '
313                     'servo host %s, from %s to %s', url, self.hostname,
314                     current_build_number, target_build_number)
315        cros_provisioner.run_provision()
316
317
318    def has_power(self):
319        """Return whether or not the servo host is powered by PoE or RPM."""
320        # TODO(fdeng): See crbug.com/302791
321        # For now, assume all servo hosts in the lab have power.
322        return self.is_in_lab()
323
324
325    def _post_update_reboot(self):
326        """ Reboot servohost after an quick provision.
327
328        We need to do some specifal cleanup before and after reboot
329        when there is an update pending.
330        """
331        # Regarding the 'crossystem' command below: In some cases,
332        # the update flow puts the TPM into a state such that it
333        # fails verification.  We don't know why.  However, this
334        # call papers over the problem by clearing the TPM during
335        # the reboot.
336        #
337        # We ignore failures from 'crossystem'.  Although failure
338        # here is unexpected, and could signal a bug, the point of
339        # the exercise is to paper over problems; allowing this to
340        # fail would defeat the purpose.
341
342        # Preserve critical files before reboot since post-provision
343        # clobbering will wipe the stateful partition.
344        # TODO(xianuowang@) Remove this logic once we have updated to
345        # a image with https://crrev.com/c/2485908.
346        path_to_preserve = [
347                '/var/lib/servod',
348                '/var/lib/device_health_profile',
349        ]
350        safe_location = '/mnt/stateful_partition/unencrypted/preserve/'
351        for item in path_to_preserve:
352            dest = os.path.join(safe_location, item.split('/')[-1])
353            self.run('rm -rf %s' % dest, ignore_status=True)
354            self.run('mv %s %s' % (item, safe_location), ignore_status=True)
355
356        self.run('crossystem clear_tpm_owner_request=1', ignore_status=True)
357        self._servo_host_reboot()
358        logging.debug('Cleaning up autotest directories if exist.')
359        try:
360            installed_autodir = autotest.Autotest.get_installed_autodir(self)
361            self.run('rm -rf ' + installed_autodir)
362        except autotest.AutodirNotFoundError:
363            logging.debug('No autotest installed directory found.')
364
365        # Recover preserved files to original location.
366        # TODO(xianuowang@) Remove this logic once we have updated to
367        # a image with https://crrev.com/c/2485908.
368        for item in path_to_preserve:
369            src = os.path.join(safe_location, item.split('/')[-1])
370            dest = '/'.join(item.split('/')[:-1])
371            self.run('mv %s %s' % (src, dest), ignore_status=True)
372
373    def power_cycle(self):
374        """Cycle power to this host via PoE(servo v3) or RPM(labstation)
375        if it is a lab device.
376
377        @raises AutoservRepairError if it fails to power cycle the
378                servo host.
379
380        """
381        if self.has_power():
382            try:
383                rpm_client.set_power(self, 'CYCLE')
384            except (socket.error, six.moves.xmlrpc_client.Error,
385                    six.moves.http_client.BadStatusLine,
386                    rpm_client.RemotePowerException) as e:
387                raise hosts.AutoservRepairError(
388                    'Power cycling %s failed: %s' % (self.hostname, e),
389                    'power_cycle_via_rpm_failed'
390                )
391        else:
392            logging.info('Skipping power cycling, not a lab device.')
393
394
395    def _servo_host_reboot(self):
396        """Reboot this servo host because a reboot is requested."""
397        logging.info('Rebooting servo host %s from build %s', self.hostname,
398                     self.get_release_version())
399        # Tell the reboot() call not to wait for completion.
400        # Otherwise, the call will log reboot failure if servo does
401        # not come back.  The logged reboot failure will lead to
402        # test job failure.  If the test does not require servo, we
403        # don't want servo failure to fail the test with error:
404        # `Host did not return from reboot` in status.log.
405        self.reboot(fastsync=True, wait=False)
406
407        # We told the reboot() call not to wait, but we need to wait
408        # for the reboot before we continue.  Alas.  The code from
409        # here below is basically a copy of Host.wait_for_restart(),
410        # with the logging bits ripped out, so that they can't cause
411        # the failure logging problem described above.
412        #
413        # The black stain that this has left on my soul can never be
414        # erased.
415        old_boot_id = self.get_boot_id()
416        if not self.wait_down(timeout=self.WAIT_DOWN_REBOOT_TIMEOUT,
417                              warning_timer=self.WAIT_DOWN_REBOOT_WARNING,
418                              old_boot_id=old_boot_id):
419            raise error.AutoservHostError(
420                'servo host %s failed to shut down.' %
421                self.hostname)
422        if self.wait_up(timeout=self.REBOOT_TIMEOUT):
423            logging.info('servo host %s back from reboot, with build %s',
424                         self.hostname, self.get_release_version())
425        else:
426            raise error.AutoservHostError(
427                'servo host %s failed to come back from reboot.' %
428                self.hostname)
429
430
431    def make_ssh_command(self, user='root', port=22, opts='', hosts_file=None,
432        connect_timeout=None, alive_interval=None, alive_count_max=None,
433        connection_attempts=None):
434        """Override default make_ssh_command to use tuned options.
435
436        Tuning changes:
437          - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH
438          connection failure. Consistency with remote_access.py.
439
440          - ServerAliveInterval=180; which causes SSH to ping connection every
441          180 seconds. In conjunction with ServerAliveCountMax ensures
442          that if the connection dies, Autotest will bail out quickly.
443
444          - ServerAliveCountMax=3; consistency with remote_access.py.
445
446          - ConnectAttempts=4; reduce flakiness in connection errors;
447          consistency with remote_access.py.
448
449          - UserKnownHostsFile=/dev/null; we don't care about the keys.
450
451          - SSH protocol forced to 2; needed for ServerAliveInterval.
452
453        @param user User name to use for the ssh connection.
454        @param port Port on the target host to use for ssh connection.
455        @param opts Additional options to the ssh command.
456        @param hosts_file Ignored.
457        @param connect_timeout Ignored.
458        @param alive_interval Ignored.
459        @param alive_count_max Ignored.
460        @param connection_attempts Ignored.
461
462        @returns: An ssh command with the requested settings.
463
464        """
465        options = ' '.join([opts, '-o Protocol=2'])
466        return super(BaseServoHost, self).make_ssh_command(
467            user=user, port=port, opts=options, hosts_file='/dev/null',
468            connect_timeout=30, alive_interval=180, alive_count_max=3,
469            connection_attempts=4)
470
471
472    def _make_scp_cmd(self, sources, dest):
473        """Format scp command.
474
475        Given a list of source paths and a destination path, produces the
476        appropriate scp command for encoding it. Remote paths must be
477        pre-encoded. Overrides _make_scp_cmd in AbstractSSHHost
478        to allow additional ssh options.
479
480        @param sources: A list of source paths to copy from.
481        @param dest: Destination path to copy to.
482
483        @returns: An scp command that copies |sources| on local machine to
484                  |dest| on the remote servo host.
485
486        """
487        command = ('scp -rq %s -o BatchMode=yes -o StrictHostKeyChecking=no '
488                   '-o UserKnownHostsFile=/dev/null -P %d %s "%s"')
489        return command % (self._master_ssh.ssh_option,
490                          self.port, sources, dest)
491
492
493    def run(self, command, timeout=3600, ignore_status=False,
494        stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
495        connect_timeout=30, ssh_failure_retry_ok=False,
496        options='', stdin=None, verbose=True, args=()):
497        """Run a command on the servo host.
498
499        Extends method `run` in SSHHost. If the servo host is a remote device,
500        it will call `run` in SSHost without changing anything.
501        If the servo host is 'localhost', it will call utils.system_output.
502
503        @param command: The command line string.
504        @param timeout: Time limit in seconds before attempting to
505                        kill the running process. The run() function
506                        will take a few seconds longer than 'timeout'
507                        to complete if it has to kill the process.
508        @param ignore_status: Do not raise an exception, no matter
509                              what the exit code of the command is.
510        @param stdout_tee/stderr_tee: Where to tee the stdout/stderr.
511        @param connect_timeout: SSH connection timeout (in seconds)
512                                Ignored if host is 'localhost'.
513        @param options: String with additional ssh command options
514                        Ignored if host is 'localhost'.
515        @param ssh_failure_retry_ok: when True and ssh connection failure is
516                                     suspected, OK to retry command (but not
517                                     compulsory, and likely not needed here)
518        @param stdin: Stdin to pass (a string) to the executed command.
519        @param verbose: Log the commands.
520        @param args: Sequence of strings to pass as arguments to command by
521                     quoting them in " and escaping their contents if necessary.
522
523        @returns: A utils.CmdResult object.
524
525        @raises AutoservRunError if the command failed.
526        @raises AutoservSSHTimeout SSH connection has timed out. Only applies
527                when servo host is not 'localhost'.
528
529        """
530        run_args = {
531            'command'             : command,
532            'timeout'             : timeout,
533            'ignore_status'       : ignore_status,
534            'stdout_tee'          : stdout_tee,
535            'stderr_tee'          : stderr_tee,
536            # connect_timeout     n/a for localhost
537            # options             n/a for localhost
538            # ssh_failure_retry_ok n/a for localhost
539            'stdin'               : stdin,
540            'verbose'             : verbose,
541            'args'                : args,
542        }
543        if self.is_localhost():
544            if self._sudo_required:
545                run_args['command'] = 'sudo -n sh -c "%s"' % utils.sh_escape(
546                    command)
547            try:
548                return utils.run(**run_args)
549            except error.CmdError as e:
550                logging.error(e)
551                raise error.AutoservRunError('command execution error',
552                                             e.result_obj)
553        else:
554            run_args['connect_timeout'] = connect_timeout
555            run_args['options'] = options
556            run_args['ssh_failure_retry_ok'] = ssh_failure_retry_ok
557            return super(BaseServoHost, self).run(**run_args)
558
559    def _mount_drive(self, src_path, dst_path):
560        """Mount an external drive on servohost.
561
562        @param: src_path  the drive path to mount(e.g. /dev/sda3).
563        @param: dst_path  the destination directory on servohost to mount
564                          the drive.
565
566        @returns: True if mount success otherwise False.
567        """
568        # Make sure the dst dir exists.
569        self.run('mkdir -p %s' % dst_path)
570
571        result = self.run('mount -o ro %s %s' % (src_path, dst_path),
572                          ignore_status=True)
573        return result.exit_status == 0
574
575    def _unmount_drive(self, mount_path):
576        """Unmount a drive from servohost.
577
578        @param: mount_path  the path on servohost to unmount.
579
580        @returns: True if unmount success otherwise False.
581        """
582        result = self.run('umount %s' % mount_path, ignore_status=True)
583        return result.exit_status == 0
584
585    def wait_ready(self, required_uptime=300):
586        """Wait ready for a servohost if it has been rebooted recently.
587
588        It may take a few minutes until all servos and their componments
589        re-enumerated and become ready after a servohost(especially labstation
590        as it supports multiple servos) reboot, so we need to make sure the
591        servohost has been up for a given a mount of time before trying to
592        start any actions.
593
594        @param required_uptime: Minimum uptime in seconds that we can
595                                consdier a servohost be ready.
596        """
597        uptime = float(self.check_uptime())
598        # To prevent unexpected output from check_uptime() that causes long
599        # sleep, make sure the maximum wait time <= required_uptime.
600        diff = min(required_uptime - uptime, required_uptime)
601        if diff > 0:
602            logging.info(
603                    'The servohost was just rebooted, wait %s'
604                    ' seconds for it to become ready', diff)
605            time.sleep(diff)
606