• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (c) 2008 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import os, time, socket, shutil, glob, logging, tempfile, re
6import shlex
7import subprocess
8
9from autotest_lib.client.bin.result_tools import runner as result_tools_runner
10from autotest_lib.client.common_lib import error
11from autotest_lib.client.common_lib.cros.network import ping_runner
12from autotest_lib.client.common_lib.global_config import global_config
13from autotest_lib.server import utils, autotest
14from autotest_lib.server.hosts import host_info
15from autotest_lib.server.hosts import remote
16from autotest_lib.server.hosts import rpc_server_tracker
17from autotest_lib.server.hosts import ssh_multiplex
18
19# pylint: disable=C0111
20
21get_value = global_config.get_config_value
22enable_master_ssh = get_value('AUTOSERV', 'enable_master_ssh', type=bool,
23                              default=False)
24
25# Number of seconds to use the cached up status.
26_DEFAULT_UP_STATUS_EXPIRATION_SECONDS = 300
27_DEFAULT_SSH_PORT = 22
28
29# Number of seconds to wait for the host to shut down in wait_down().
30_DEFAULT_WAIT_DOWN_TIME_SECONDS = 120
31
32# Number of seconds to wait for the host to boot up in wait_up().
33_DEFAULT_WAIT_UP_TIME_SECONDS = 120
34
35# Timeout in seconds for a single call of get_boot_id() in wait_down()
36# and a single ssh ping in wait_up().
37_DEFAULT_MAX_PING_TIMEOUT = 10
38
39class AbstractSSHHost(remote.RemoteHost):
40    """
41    This class represents a generic implementation of most of the
42    framework necessary for controlling a host via ssh. It implements
43    almost all of the abstract Host methods, except for the core
44    Host.run method.
45    """
46    VERSION_PREFIX = ''
47    # Timeout for master ssh connection setup, in seconds.
48    DEFAULT_START_MASTER_SSH_TIMEOUT_S = 5
49
50    def _initialize(self, hostname, user="root", port=_DEFAULT_SSH_PORT,
51                    password="", is_client_install_supported=True,
52                    afe_host=None, host_info_store=None, connection_pool=None,
53                    *args, **dargs):
54        super(AbstractSSHHost, self)._initialize(hostname=hostname,
55                                                 *args, **dargs)
56        """
57        @param hostname: The hostname of the host.
58        @param user: The username to use when ssh'ing into the host.
59        @param password: The password to use when ssh'ing into the host.
60        @param port: The port to use for ssh.
61        @param is_client_install_supported: Boolean to indicate if we can
62                install autotest on the host.
63        @param afe_host: The host object attained from the AFE (get_hosts).
64        @param host_info_store: Optional host_info.CachingHostInfoStore object
65                to obtain / update host information.
66        @param connection_pool: ssh_multiplex.ConnectionPool instance to share
67                the master ssh connection across control scripts.
68        """
69        # IP address is retrieved only on demand. Otherwise the host
70        # initialization will fail for host is not online.
71        self._ip = None
72        self.user = user
73        self.port = port
74        self.password = password
75        self._is_client_install_supported = is_client_install_supported
76        self._use_rsync = None
77        self.known_hosts_file = tempfile.mkstemp()[1]
78        self._rpc_server_tracker = rpc_server_tracker.RpcServerTracker(self);
79
80        """
81        Master SSH connection background job, socket temp directory and socket
82        control path option. If master-SSH is enabled, these fields will be
83        initialized by start_master_ssh when a new SSH connection is initiated.
84        """
85        self._connection_pool = connection_pool
86        if connection_pool:
87            self._master_ssh = connection_pool.get(hostname, user, port)
88        else:
89            self._master_ssh = ssh_multiplex.MasterSsh(hostname, user, port)
90
91        self._afe_host = afe_host or utils.EmptyAFEHost()
92        self.host_info_store = (host_info_store or
93                                host_info.InMemoryHostInfoStore())
94
95        # The cached status of whether the DUT responded to ping.
96        self._cached_up_status = None
97        # The timestamp when the value of _cached_up_status is set.
98        self._cached_up_status_updated = None
99
100
101    @property
102    def ip(self):
103        """@return IP address of the host.
104        """
105        if not self._ip:
106            self._ip = socket.getaddrinfo(self.hostname, None)[0][4][0]
107        return self._ip
108
109
110    @property
111    def is_client_install_supported(self):
112        """"
113        Returns True if the host supports autotest client installs, False
114        otherwise.
115        """
116        return self._is_client_install_supported
117
118
119    @property
120    def rpc_server_tracker(self):
121        """"
122        @return The RPC server tracker associated with this host.
123        """
124        return self._rpc_server_tracker
125
126
127    @property
128    def is_default_port(self):
129      """Returns True if its port is default SSH port."""
130      return self.port == _DEFAULT_SSH_PORT
131
132    @property
133    def host_port(self):
134        """Returns hostname if port is default. Otherwise, hostname:port.
135        """
136        if self.is_default_port:
137            return self.hostname
138        else:
139            return '%s:%d' % (self.hostname, self.port)
140
141
142    # Though it doesn't use self here, it is not declared as staticmethod
143    # because its subclass may use self to access member variables.
144    def make_ssh_command(self, user="root", port=_DEFAULT_SSH_PORT, opts='',
145                         hosts_file='/dev/null', connect_timeout=30,
146                         alive_interval=300, alive_count_max=3,
147                         connection_attempts=1):
148        ssh_options = " ".join([
149            opts,
150            self.make_ssh_options(
151                hosts_file=hosts_file, connect_timeout=connect_timeout,
152                alive_interval=alive_interval, alive_count_max=alive_count_max,
153                connection_attempts=connection_attempts)])
154        return "/usr/bin/ssh -a -x %s -l %s -p %d" % (ssh_options, user, port)
155
156
157    @staticmethod
158    def make_ssh_options(hosts_file='/dev/null', connect_timeout=30,
159                         alive_interval=300, alive_count_max=3,
160                         connection_attempts=1):
161        """Composes SSH -o options."""
162        assert isinstance(connect_timeout, (int, long))
163        assert connect_timeout > 0 # can't disable the timeout
164
165        options = [("StrictHostKeyChecking", "no"),
166                   ("UserKnownHostsFile", hosts_file),
167                   ("BatchMode", "yes"),
168                   ("ConnectTimeout", str(connect_timeout)),
169                   ("ServerAliveInterval", str(alive_interval)),
170                   ("ServerAliveCountMax", str(alive_count_max)),
171                   ("ConnectionAttempts", str(connection_attempts))]
172        return " ".join("-o %s=%s" % kv for kv in options)
173
174
175    def use_rsync(self):
176        if self._use_rsync is not None:
177            return self._use_rsync
178
179        # Check if rsync is available on the remote host. If it's not,
180        # don't try to use it for any future file transfers.
181        self._use_rsync = self.check_rsync()
182        if not self._use_rsync:
183            logging.warning("rsync not available on remote host %s -- disabled",
184                            self.host_port)
185        return self._use_rsync
186
187
188    def check_rsync(self):
189        """
190        Check if rsync is available on the remote host.
191        """
192        try:
193            self.run("rsync --version", stdout_tee=None, stderr_tee=None)
194        except error.AutoservRunError:
195            return False
196        return True
197
198
199    def _encode_remote_paths(self, paths, escape=True, use_scp=False):
200        """
201        Given a list of file paths, encodes it as a single remote path, in
202        the style used by rsync and scp.
203        escape: add \\ to protect special characters.
204        use_scp: encode for scp if true, rsync if false.
205        """
206        if escape:
207            paths = [utils.scp_remote_escape(path) for path in paths]
208
209        remote = self.hostname
210
211        # rsync and scp require IPv6 brackets, even when there isn't any
212        # trailing port number (ssh doesn't support IPv6 brackets).
213        # In the Python >= 3.3 future, 'import ipaddress' will parse addresses.
214        if re.search(r':.*:', remote):
215            remote = '[%s]' % remote
216
217        if use_scp:
218            return '%s@%s:"%s"' % (self.user, remote, " ".join(paths))
219        else:
220            return '%s@%s:%s' % (
221                    self.user, remote,
222                    " :".join('"%s"' % p for p in paths))
223
224    def _encode_local_paths(self, paths, escape=True):
225        """
226        Given a list of file paths, encodes it as a single local path.
227        escape: add \\ to protect special characters.
228        """
229        if escape:
230            paths = [utils.sh_escape(path) for path in paths]
231
232        return " ".join('"%s"' % p for p in paths)
233
234
235    def rsync_options(self, delete_dest=False, preserve_symlinks=False,
236                      safe_symlinks=False, excludes=None):
237        """Obtains rsync options for the remote."""
238        ssh_cmd = self.make_ssh_command(user=self.user, port=self.port,
239                                        opts=self._master_ssh.ssh_option,
240                                        hosts_file=self.known_hosts_file)
241        if delete_dest:
242            delete_flag = "--delete"
243        else:
244            delete_flag = ""
245        if safe_symlinks:
246            symlink_flag = "-l --safe-links"
247        elif preserve_symlinks:
248            symlink_flag = "-l"
249        else:
250            symlink_flag = "-L"
251        exclude_args = ''
252        if excludes:
253            exclude_args = ' '.join(
254                    ["--exclude '%s'" % exclude for exclude in excludes])
255        return "%s %s --timeout=1800 --rsh='%s' -az --no-o --no-g %s" % (
256            symlink_flag, delete_flag, ssh_cmd, exclude_args)
257
258
259    def _make_rsync_cmd(self, sources, dest, delete_dest,
260                        preserve_symlinks, safe_symlinks, excludes=None):
261        """
262        Given a string of source paths and a destination path, produces the
263        appropriate rsync command for copying them. Remote paths must be
264        pre-encoded.
265        """
266        rsync_options = self.rsync_options(
267            delete_dest=delete_dest, preserve_symlinks=preserve_symlinks,
268            safe_symlinks=safe_symlinks, excludes=excludes)
269        return 'rsync %s %s "%s"' % (rsync_options, sources, dest)
270
271
272    def _make_ssh_cmd(self, cmd):
273        """
274        Create a base ssh command string for the host which can be used
275        to run commands directly on the machine
276        """
277        base_cmd = self.make_ssh_command(user=self.user, port=self.port,
278                                         opts=self._master_ssh.ssh_option,
279                                         hosts_file=self.known_hosts_file)
280
281        return '%s %s "%s"' % (base_cmd, self.hostname, utils.sh_escape(cmd))
282
283    def _make_scp_cmd(self, sources, dest):
284        """
285        Given a string of source paths and a destination path, produces the
286        appropriate scp command for encoding it. Remote paths must be
287        pre-encoded.
288        """
289        command = ("scp -rq %s -o StrictHostKeyChecking=no "
290                   "-o UserKnownHostsFile=%s -P %d %s '%s'")
291        return command % (self._master_ssh.ssh_option, self.known_hosts_file,
292                          self.port, sources, dest)
293
294
295    def _make_rsync_compatible_globs(self, path, is_local):
296        """
297        Given an rsync-style path, returns a list of globbed paths
298        that will hopefully provide equivalent behaviour for scp. Does not
299        support the full range of rsync pattern matching behaviour, only that
300        exposed in the get/send_file interface (trailing slashes).
301
302        The is_local param is flag indicating if the paths should be
303        interpreted as local or remote paths.
304        """
305
306        # non-trailing slash paths should just work
307        if len(path) == 0 or path[-1] != "/":
308            return [path]
309
310        # make a function to test if a pattern matches any files
311        if is_local:
312            def glob_matches_files(path, pattern):
313                return len(glob.glob(path + pattern)) > 0
314        else:
315            def glob_matches_files(path, pattern):
316                result = self.run("ls \"%s\"%s" % (utils.sh_escape(path),
317                                                   pattern),
318                                  stdout_tee=None, ignore_status=True)
319                return result.exit_status == 0
320
321        # take a set of globs that cover all files, and see which are needed
322        patterns = ["*", ".[!.]*"]
323        patterns = [p for p in patterns if glob_matches_files(path, p)]
324
325        # convert them into a set of paths suitable for the commandline
326        if is_local:
327            return ["\"%s\"%s" % (utils.sh_escape(path), pattern)
328                    for pattern in patterns]
329        else:
330            return [utils.scp_remote_escape(path) + pattern
331                    for pattern in patterns]
332
333
334    def _make_rsync_compatible_source(self, source, is_local):
335        """
336        Applies the same logic as _make_rsync_compatible_globs, but
337        applies it to an entire list of sources, producing a new list of
338        sources, properly quoted.
339        """
340        return sum((self._make_rsync_compatible_globs(path, is_local)
341                    for path in source), [])
342
343
344    def _set_umask_perms(self, dest):
345        """
346        Given a destination file/dir (recursively) set the permissions on
347        all the files and directories to the max allowed by running umask.
348        """
349
350        # now this looks strange but I haven't found a way in Python to _just_
351        # get the umask, apparently the only option is to try to set it
352        umask = os.umask(0)
353        os.umask(umask)
354
355        max_privs = 0777 & ~umask
356
357        def set_file_privs(filename):
358            """Sets mode of |filename|.  Assumes |filename| exists."""
359            file_stat = os.stat(filename)
360
361            file_privs = max_privs
362            # if the original file permissions do not have at least one
363            # executable bit then do not set it anywhere
364            if not file_stat.st_mode & 0111:
365                file_privs &= ~0111
366
367            os.chmod(filename, file_privs)
368
369        # try a bottom-up walk so changes on directory permissions won't cut
370        # our access to the files/directories inside it
371        for root, dirs, files in os.walk(dest, topdown=False):
372            # when setting the privileges we emulate the chmod "X" behaviour
373            # that sets to execute only if it is a directory or any of the
374            # owner/group/other already has execute right
375            for dirname in dirs:
376                os.chmod(os.path.join(root, dirname), max_privs)
377
378            # Filter out broken symlinks as we go.
379            for filename in filter(os.path.exists, files):
380                set_file_privs(os.path.join(root, filename))
381
382
383        # now set privs for the dest itself
384        if os.path.isdir(dest):
385            os.chmod(dest, max_privs)
386        else:
387            set_file_privs(dest)
388
389
390    def get_file(self, source, dest, delete_dest=False, preserve_perm=True,
391                 preserve_symlinks=False, retry=True, safe_symlinks=False,
392                 try_rsync=True):
393        """
394        Copy files from the remote host to a local path.
395
396        Directories will be copied recursively.
397        If a source component is a directory with a trailing slash,
398        the content of the directory will be copied, otherwise, the
399        directory itself and its content will be copied. This
400        behavior is similar to that of the program 'rsync'.
401
402        Args:
403                source: either
404                        1) a single file or directory, as a string
405                        2) a list of one or more (possibly mixed)
406                                files or directories
407                dest: a file or a directory (if source contains a
408                        directory or more than one element, you must
409                        supply a directory dest)
410                delete_dest: if this is true, the command will also clear
411                             out any old files at dest that are not in the
412                             source
413                preserve_perm: tells get_file() to try to preserve the sources
414                               permissions on files and dirs
415                preserve_symlinks: try to preserve symlinks instead of
416                                   transforming them into files/dirs on copy
417                safe_symlinks: same as preserve_symlinks, but discard links
418                               that may point outside the copied tree
419                try_rsync: set to False to skip directly to using scp
420        Raises:
421                AutoservRunError: the scp command failed
422        """
423        logging.debug('get_file. source: %s, dest: %s, delete_dest: %s,'
424                      'preserve_perm: %s, preserve_symlinks:%s', source, dest,
425                      delete_dest, preserve_perm, preserve_symlinks)
426
427        # Start a master SSH connection if necessary.
428        self.start_master_ssh()
429
430        if isinstance(source, basestring):
431            source = [source]
432        dest = os.path.abspath(dest)
433
434        # If rsync is disabled or fails, try scp.
435        try_scp = True
436        if try_rsync and self.use_rsync():
437            logging.debug('Using Rsync.')
438            try:
439                remote_source = self._encode_remote_paths(source)
440                local_dest = utils.sh_escape(dest)
441                rsync = self._make_rsync_cmd(remote_source, local_dest,
442                                             delete_dest, preserve_symlinks,
443                                             safe_symlinks)
444                utils.run(rsync)
445                try_scp = False
446            except error.CmdError, e:
447                # retry on rsync exit values which may be caused by transient
448                # network problems:
449                #
450                # rc 10: Error in socket I/O
451                # rc 12: Error in rsync protocol data stream
452                # rc 23: Partial transfer due to error
453                # rc 255: Ssh error
454                #
455                # Note that rc 23 includes dangling symlinks.  In this case
456                # retrying is useless, but not very damaging since rsync checks
457                # for those before starting the transfer (scp does not).
458                status = e.result_obj.exit_status
459                if status in [10, 12, 23, 255] and retry:
460                    logging.warning('rsync status %d, retrying', status)
461                    self.get_file(source, dest, delete_dest, preserve_perm,
462                                  preserve_symlinks, retry=False)
463                    # The nested get_file() does all that's needed.
464                    return
465                else:
466                    logging.warning("trying scp, rsync failed: %s (%d)",
467                                     e, status)
468
469        if try_scp:
470            logging.debug('Trying scp.')
471            # scp has no equivalent to --delete, just drop the entire dest dir
472            if delete_dest and os.path.isdir(dest):
473                shutil.rmtree(dest)
474                os.mkdir(dest)
475
476            remote_source = self._make_rsync_compatible_source(source, False)
477            if remote_source:
478                # _make_rsync_compatible_source() already did the escaping
479                remote_source = self._encode_remote_paths(
480                        remote_source, escape=False, use_scp=True)
481                local_dest = utils.sh_escape(dest)
482                scp = self._make_scp_cmd(remote_source, local_dest)
483                try:
484                    utils.run(scp)
485                except error.CmdError, e:
486                    logging.debug('scp failed: %s', e)
487                    raise error.AutoservRunError(e.args[0], e.args[1])
488
489        if not preserve_perm:
490            # we have no way to tell scp to not try to preserve the
491            # permissions so set them after copy instead.
492            # for rsync we could use "--no-p --chmod=ugo=rwX" but those
493            # options are only in very recent rsync versions
494            self._set_umask_perms(dest)
495
496
497    def send_file(self, source, dest, delete_dest=False,
498                  preserve_symlinks=False, excludes=None):
499        """
500        Copy files from a local path to the remote host.
501
502        Directories will be copied recursively.
503        If a source component is a directory with a trailing slash,
504        the content of the directory will be copied, otherwise, the
505        directory itself and its content will be copied. This
506        behavior is similar to that of the program 'rsync'.
507
508        Args:
509                source: either
510                        1) a single file or directory, as a string
511                        2) a list of one or more (possibly mixed)
512                                files or directories
513                dest: a file or a directory (if source contains a
514                        directory or more than one element, you must
515                        supply a directory dest)
516                delete_dest: if this is true, the command will also clear
517                             out any old files at dest that are not in the
518                             source
519                preserve_symlinks: controls if symlinks on the source will be
520                    copied as such on the destination or transformed into the
521                    referenced file/directory
522                excludes: A list of file pattern that matches files not to be
523                          sent. `send_file` will fail if exclude is set, since
524                          local copy does not support --exclude, e.g., when
525                          using scp to copy file.
526
527        Raises:
528                AutoservRunError: the scp command failed
529        """
530        logging.debug('send_file. source: %s, dest: %s, delete_dest: %s,'
531                      'preserve_symlinks:%s', source, dest,
532                      delete_dest, preserve_symlinks)
533        # Start a master SSH connection if necessary.
534        self.start_master_ssh()
535
536        if isinstance(source, basestring):
537            source = [source]
538
539        local_sources = self._encode_local_paths(source)
540        if not local_sources:
541            raise error.TestError('source |%s| yielded an empty string' % (
542                source))
543        if local_sources.find('\x00') != -1:
544            raise error.TestError('one or more sources include NUL char')
545
546        # If rsync is disabled or fails, try scp.
547        try_scp = True
548        if self.use_rsync():
549            logging.debug('Using Rsync.')
550            remote_dest = self._encode_remote_paths([dest])
551            try:
552                rsync = self._make_rsync_cmd(local_sources, remote_dest,
553                                             delete_dest, preserve_symlinks,
554                                             False, excludes=excludes)
555                utils.run(rsync)
556                try_scp = False
557            except error.CmdError, e:
558                logging.warning("trying scp, rsync failed: %s", e)
559
560        if try_scp:
561            logging.debug('Trying scp.')
562            if excludes:
563                raise error.AutotestHostRunError(
564                        '--exclude is not supported in scp, try to use rsync. '
565                        'excludes: %s' % ','.join(excludes), None)
566            # scp has no equivalent to --delete, just drop the entire dest dir
567            if delete_dest:
568                is_dir = self.run("ls -d %s/" % dest,
569                                  ignore_status=True).exit_status == 0
570                if is_dir:
571                    cmd = "rm -rf %s && mkdir %s"
572                    cmd %= (dest, dest)
573                    self.run(cmd)
574
575            remote_dest = self._encode_remote_paths([dest], use_scp=True)
576            local_sources = self._make_rsync_compatible_source(source, True)
577            if local_sources:
578                sources = self._encode_local_paths(local_sources, escape=False)
579                scp = self._make_scp_cmd(sources, remote_dest)
580                try:
581                    utils.run(scp)
582                except error.CmdError, e:
583                    logging.debug('scp failed: %s', e)
584                    raise error.AutoservRunError(e.args[0], e.args[1])
585            else:
586                logging.debug('skipping scp for empty source list')
587
588
589    def verify_ssh_user_access(self):
590        """Verify ssh access to this host.
591
592        @returns False if ssh_ping fails due to Permissions error, True
593                 otherwise.
594        """
595        try:
596            self.ssh_ping()
597        except (error.AutoservSshPermissionDeniedError,
598                error.AutoservSshPingHostError):
599            return False
600        return True
601
602
603    def ssh_ping(self, timeout=60, connect_timeout=None, base_cmd='true'):
604        """
605        Pings remote host via ssh.
606
607        @param timeout: Command execution timeout in seconds.
608                        Defaults to 60 seconds.
609        @param connect_timeout: ssh connection timeout in seconds.
610        @param base_cmd: The base command to run with the ssh ping.
611                         Defaults to true.
612        @raise AutoservSSHTimeout: If the ssh ping times out.
613        @raise AutoservSshPermissionDeniedError: If ssh ping fails due to
614                                                 permissions.
615        @raise AutoservSshPingHostError: For other AutoservRunErrors.
616        """
617        ctimeout = min(timeout, connect_timeout or timeout)
618        try:
619            self.run(base_cmd, timeout=timeout, connect_timeout=ctimeout,
620                     ssh_failure_retry_ok=True)
621        except error.AutoservSSHTimeout:
622            msg = "Host (ssh) verify timed out (timeout = %d)" % timeout
623            raise error.AutoservSSHTimeout(msg)
624        except error.AutoservSshPermissionDeniedError:
625            #let AutoservSshPermissionDeniedError be visible to the callers
626            raise
627        except error.AutoservRunError, e:
628            # convert the generic AutoservRunError into something more
629            # specific for this context
630            raise error.AutoservSshPingHostError(e.description + '\n' +
631                                                 repr(e.result_obj))
632
633
634    def is_up(self, timeout=60, connect_timeout=None, base_cmd='true'):
635        """
636        Check if the remote host is up by ssh-ing and running a base command.
637
638        @param timeout: command execution timeout in seconds.
639        @param connect_timeout: ssh connection timeout in seconds.
640        @param base_cmd: a base command to run with ssh. The default is 'true'.
641        @returns True if the remote host is up before the timeout expires,
642                 False otherwise.
643        """
644        try:
645            self.ssh_ping(timeout=timeout,
646                          connect_timeout=connect_timeout,
647                          base_cmd=base_cmd)
648        except error.AutoservError:
649            return False
650        else:
651            return True
652
653
654    def is_up_fast(self):
655        """Return True if the host can be pinged."""
656        ping_config = ping_runner.PingConfig(
657                self.hostname, count=3, ignore_result=True, ignore_status=True)
658        return ping_runner.PingRunner().ping(ping_config).received > 0
659
660
661    def wait_up(self, timeout=_DEFAULT_WAIT_UP_TIME_SECONDS):
662        """
663        Wait until the remote host is up or the timeout expires.
664
665        In fact, it will wait until an ssh connection to the remote
666        host can be established, and getty is running.
667
668        @param timeout time limit in seconds before returning even
669            if the host is not up.
670
671        @returns True if the host was found to be up before the timeout expires,
672                 False otherwise
673        """
674        current_time = int(time.time())
675        end_time = current_time + timeout
676
677        autoserv_error_logged = False
678        while current_time < end_time:
679            ping_timeout = min(_DEFAULT_MAX_PING_TIMEOUT,
680                               end_time - current_time)
681            if self.is_up(timeout=ping_timeout, connect_timeout=ping_timeout):
682                try:
683                    if self.are_wait_up_processes_up():
684                        logging.debug('Host %s is now up', self.host_port)
685                        return True
686                except error.AutoservError as e:
687                    if not autoserv_error_logged:
688                        logging.debug('Ignoring failure to reach %s: %s %s',
689                                      self.host_port, e,
690                                      '(and further similar failures)')
691                        autoserv_error_logged = True
692            time.sleep(1)
693            current_time = int(time.time())
694
695        logging.debug('Host %s is still down after waiting %d seconds',
696                      self.host_port, int(timeout + time.time() - end_time))
697        return False
698
699
700    def wait_down(self, timeout=_DEFAULT_WAIT_DOWN_TIME_SECONDS,
701                  warning_timer=None, old_boot_id=None,
702                  max_ping_timeout=_DEFAULT_MAX_PING_TIMEOUT):
703        """
704        Wait until the remote host is down or the timeout expires.
705
706        If old_boot_id is provided, waits until either the machine is
707        unpingable or self.get_boot_id() returns a value different from
708        old_boot_id. If the boot_id value has changed then the function
709        returns True under the assumption that the machine has shut down
710        and has now already come back up.
711
712        If old_boot_id is None then until the machine becomes unreachable the
713        method assumes the machine has not yet shut down.
714
715        @param timeout Time limit in seconds before returning even if the host
716            is still up.
717        @param warning_timer Time limit in seconds that will generate a warning
718            if the host is not down yet. Can be None for no warning.
719        @param old_boot_id A string containing the result of self.get_boot_id()
720            prior to the host being told to shut down. Can be None if this is
721            not available.
722        @param max_ping_timeout Maximum timeout in seconds for each
723            self.get_boot_id() call. If this timeout is hit, it is assumed that
724            the host went down and became unreachable.
725
726        @returns True if the host was found to be down (max_ping_timeout timeout
727            expired or boot_id changed if provided) and False if timeout
728            expired.
729        """
730        #TODO: there is currently no way to distinguish between knowing
731        #TODO: boot_id was unsupported and not knowing the boot_id.
732        current_time = int(time.time())
733        end_time = current_time + timeout
734
735        if warning_timer:
736            warn_time = current_time + warning_timer
737
738        if old_boot_id is not None:
739            logging.debug('Host %s pre-shutdown boot_id is %s',
740                          self.host_port, old_boot_id)
741
742        # Impose semi real-time deadline constraints, since some clients
743        # (eg: watchdog timer tests) expect strict checking of time elapsed.
744        # Each iteration of this loop is treated as though it atomically
745        # completes within current_time, this is needed because if we used
746        # inline time.time() calls instead then the following could happen:
747        #
748        # while time.time() < end_time:                     [23 < 30]
749        #    some code.                                     [takes 10 secs]
750        #    try:
751        #        new_boot_id = self.get_boot_id(timeout=end_time - time.time())
752        #                                                   [30 - 33]
753        # The last step will lead to a return True, when in fact the machine
754        # went down at 32 seconds (>30). Hence we need to pass get_boot_id
755        # the same time that allowed us into that iteration of the loop.
756        while current_time < end_time:
757            ping_timeout = min(end_time - current_time, max_ping_timeout)
758            try:
759                new_boot_id = self.get_boot_id(timeout=ping_timeout)
760            except error.AutoservError:
761                logging.debug('Host %s is now unreachable over ssh, is down',
762                              self.host_port)
763                return True
764            else:
765                # if the machine is up but the boot_id value has changed from
766                # old boot id, then we can assume the machine has gone down
767                # and then already come back up
768                if old_boot_id is not None and old_boot_id != new_boot_id:
769                    logging.debug('Host %s now has boot_id %s and so must '
770                                  'have rebooted', self.host_port, new_boot_id)
771                    return True
772
773            if warning_timer and current_time > warn_time:
774                self.record("INFO", None, "shutdown",
775                            "Shutdown took longer than %ds" % warning_timer)
776                # Print the warning only once.
777                warning_timer = None
778                # If a machine is stuck switching runlevels
779                # This may cause the machine to reboot.
780                self.run('kill -HUP 1', ignore_status=True)
781
782            time.sleep(1)
783            current_time = int(time.time())
784
785        return False
786
787
788    # tunable constants for the verify & repair code
789    AUTOTEST_GB_DISKSPACE_REQUIRED = get_value("SERVER",
790                                               "gb_diskspace_required",
791                                               type=float,
792                                               default=20.0)
793
794
795    def verify_connectivity(self):
796        super(AbstractSSHHost, self).verify_connectivity()
797
798        logging.info('Pinging host ' + self.host_port)
799        self.ssh_ping()
800        logging.info("Host (ssh) %s is alive", self.host_port)
801
802        if self.is_shutting_down():
803            raise error.AutoservHostIsShuttingDownError("Host is shutting down")
804
805
806    def verify_software(self):
807        super(AbstractSSHHost, self).verify_software()
808        try:
809            self.check_diskspace(autotest.Autotest.get_install_dir(self),
810                                 self.AUTOTEST_GB_DISKSPACE_REQUIRED)
811        except error.AutoservDiskFullHostError:
812            # only want to raise if it's a space issue
813            raise
814        except (error.AutoservHostError, autotest.AutodirNotFoundError):
815            logging.exception('autodir space check exception, this is probably '
816                             'safe to ignore\n')
817
818
819    def close(self):
820        super(AbstractSSHHost, self).close()
821        self.rpc_server_tracker.disconnect_all()
822        if not self._connection_pool:
823            self._master_ssh.close()
824        if os.path.exists(self.known_hosts_file):
825            os.remove(self.known_hosts_file)
826
827
828    def restart_master_ssh(self):
829        """
830        Stop and restart the ssh master connection.  This is meant as a last
831        resort when ssh commands fail and we don't understand why.
832        """
833        logging.debug('Restarting master ssh connection')
834        self._master_ssh.close()
835        self._master_ssh.maybe_start(timeout=30)
836
837
838
839    def start_master_ssh(self, timeout=DEFAULT_START_MASTER_SSH_TIMEOUT_S):
840        """
841        Called whenever a slave SSH connection needs to be initiated (e.g., by
842        run, rsync, scp). If master SSH support is enabled and a master SSH
843        connection is not active already, start a new one in the background.
844        Also, cleanup any zombie master SSH connections (e.g., dead due to
845        reboot).
846
847        timeout: timeout in seconds (default 5) to wait for master ssh
848                 connection to be established. If timeout is reached, a
849                 warning message is logged, but no other action is taken.
850        """
851        if not enable_master_ssh:
852            return
853        self._master_ssh.maybe_start(timeout=timeout)
854
855
856    def clear_known_hosts(self):
857        """Clears out the temporary ssh known_hosts file.
858
859        This is useful if the test SSHes to the machine, then reinstalls it,
860        then SSHes to it again.  It can be called after the reinstall to
861        reduce the spam in the logs.
862        """
863        logging.info("Clearing known hosts for host '%s', file '%s'.",
864                     self.host_port, self.known_hosts_file)
865        # Clear out the file by opening it for writing and then closing.
866        fh = open(self.known_hosts_file, "w")
867        fh.close()
868
869
870    def collect_logs(self, remote_src_dir, local_dest_dir, ignore_errors=True):
871        """Copy log directories from a host to a local directory.
872
873        @param remote_src_dir: A destination directory on the host.
874        @param local_dest_dir: A path to a local destination directory.
875            If it doesn't exist it will be created.
876        @param ignore_errors: If True, ignore exceptions.
877
878        @raises OSError: If there were problems creating the local_dest_dir and
879            ignore_errors is False.
880        @raises AutoservRunError, AutotestRunError: If something goes wrong
881            while copying the directories and ignore_errors is False.
882        """
883        if not self.check_cached_up_status():
884            logging.warning('Host %s did not answer to ping, skip collecting '
885                            'logs.', self.host_port)
886            return
887
888        locally_created_dest = False
889        if (not os.path.exists(local_dest_dir)
890                or not os.path.isdir(local_dest_dir)):
891            try:
892                os.makedirs(local_dest_dir)
893                locally_created_dest = True
894            except OSError as e:
895                logging.warning('Unable to collect logs from host '
896                                '%s: %s', self.host_port, e)
897                if not ignore_errors:
898                    raise
899                return
900
901        # Build test result directory summary
902        try:
903            result_tools_runner.run_on_client(self, remote_src_dir)
904        except (error.AutotestRunError, error.AutoservRunError,
905                error.AutoservSSHTimeout) as e:
906            logging.exception(
907                    'Non-critical failure: Failed to collect and throttle '
908                    'results at %s from host %s', remote_src_dir,
909                    self.host_port)
910
911        try:
912            self.get_file(remote_src_dir, local_dest_dir, safe_symlinks=True)
913        except (error.AutotestRunError, error.AutoservRunError,
914                error.AutoservSSHTimeout) as e:
915            logging.warning('Collection of %s to local dir %s from host %s '
916                            'failed: %s', remote_src_dir, local_dest_dir,
917                            self.host_port, e)
918            if locally_created_dest:
919                shutil.rmtree(local_dest_dir, ignore_errors=ignore_errors)
920            if not ignore_errors:
921                raise
922
923        # Clean up directory summary file on the client side.
924        try:
925            result_tools_runner.run_on_client(self, remote_src_dir,
926                                              cleanup_only=True)
927        except (error.AutotestRunError, error.AutoservRunError,
928                error.AutoservSSHTimeout) as e:
929            logging.exception(
930                    'Non-critical failure: Failed to cleanup result summary '
931                    'files at %s in host %s', remote_src_dir, self.hostname)
932
933
934    def create_ssh_tunnel(self, port, local_port):
935        """Create an ssh tunnel from local_port to port.
936
937        This is used to forward a port securely through a tunnel process from
938        the server to the DUT for RPC server connection.
939
940        @param port: remote port on the host.
941        @param local_port: local forwarding port.
942
943        @return: the tunnel process.
944        """
945        tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port)
946        ssh_cmd = self.make_ssh_command(opts=tunnel_options, port=self.port)
947        tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname)
948        logging.debug('Full tunnel command: %s', tunnel_cmd)
949        # Exec the ssh process directly here rather than using a shell.
950        # Using a shell leaves a dangling ssh process, because we deliver
951        # signals to the shell wrapping ssh, not the ssh process itself.
952        args = shlex.split(tunnel_cmd)
953        with open('/dev/null', 'w') as devnull:
954            tunnel_proc = subprocess.Popen(args, stdout=devnull, stderr=devnull,
955                                           close_fds=True)
956        logging.debug('Started ssh tunnel, local = %d'
957                      ' remote = %d, pid = %d',
958                      local_port, port, tunnel_proc.pid)
959        return tunnel_proc
960
961
962    def disconnect_ssh_tunnel(self, tunnel_proc):
963        """
964        Disconnects a previously forwarded port from the server to the DUT for
965        RPC server connection.
966
967        @param tunnel_proc: a tunnel process returned from |create_ssh_tunnel|.
968        """
969        if tunnel_proc.poll() is None:
970            tunnel_proc.terminate()
971            logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid)
972        else:
973            logging.debug('Tunnel pid %d terminated early, status %d',
974                          tunnel_proc.pid, tunnel_proc.returncode)
975
976
977    def get_os_type(self):
978        """Returns the host OS descriptor (to be implemented in subclasses).
979
980        @return A string describing the OS type.
981        """
982        raise NotImplementedError
983
984
985    def check_cached_up_status(
986            self, expiration_seconds=_DEFAULT_UP_STATUS_EXPIRATION_SECONDS):
987        """Check if the DUT responded to ping in the past `expiration_seconds`.
988
989        @param expiration_seconds: The number of seconds to keep the cached
990                status of whether the DUT responded to ping.
991        @return: True if the DUT has responded to ping during the past
992                 `expiration_seconds`.
993        """
994        # Refresh the up status if any of following conditions is true:
995        # * cached status is never set
996        # * cached status is False, so the method can check if the host is up
997        #   again.
998        # * If the cached status is older than `expiration_seconds`
999        expire_time = time.time() - expiration_seconds
1000        if (self._cached_up_status_updated is None or
1001                not self._cached_up_status or
1002                self._cached_up_status_updated < expire_time):
1003            self._cached_up_status = self.is_up_fast()
1004            self._cached_up_status_updated = time.time()
1005        return self._cached_up_status
1006