• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Lint as: python2, python3
2#
3# Copyright 2007 Google Inc. Released under the GPL v2
4
5"""
6This module defines the SSHHost class.
7
8Implementation details:
9You should import the "hosts" package instead of importing each type of host.
10
11        SSHHost: a remote machine with a ssh access
12"""
13
14from __future__ import absolute_import
15from __future__ import division
16from __future__ import print_function
17
18import inspect
19import logging
20import re
21import time
22
23import common
24from autotest_lib.client.common_lib import error
25from autotest_lib.client.common_lib import pxssh
26from autotest_lib.server import utils
27from autotest_lib.server.hosts import abstract_ssh
28import six
29
30# In case cros_host is being ran via SSP on an older Moblab version with an
31# older chromite version.
32try:
33    from chromite.lib import metrics
34except ImportError:
35    metrics = utils.metrics_mock
36
37
38def THIS_IS_SLOW(func):
39    """Mark the given function as slow, when looking at calls to it"""
40    func.__name__ = '%s__SLOW__' % func.__name__
41    return func
42
43
44class SSHHost(abstract_ssh.AbstractSSHHost):
45    """
46    This class represents a remote machine controlled through an ssh
47    session on which you can run programs.
48
49    It is not the machine autoserv is running on. The machine must be
50    configured for password-less login, for example through public key
51    authentication.
52
53    It includes support for controlling the machine through a serial
54    console on which you can run programs. If such a serial console is
55    set up on the machine then capabilities such as hard reset and
56    boot strap monitoring are available. If the machine does not have a
57    serial console available then ordinary SSH-based commands will
58    still be available, but attempts to use extensions such as
59    console logging or hard reset will fail silently.
60
61    Implementation details:
62    This is a leaf class in an abstract class hierarchy, it must
63    implement the unimplemented methods in parent classes.
64    """
65    RUN_TIMEOUT = 3600
66
67    def _initialize(self, hostname, *args, **dargs):
68        """
69        Construct a SSHHost object
70
71        Args:
72                hostname: network hostname or address of remote machine
73        """
74        super(SSHHost, self)._initialize(hostname=hostname, *args, **dargs)
75        self._default_run_timeout = self.RUN_TIMEOUT
76        self.setup_ssh()
77
78
79    def ssh_command(self, connect_timeout=30, options='', alive_interval=300,
80                    alive_count_max=3, connection_attempts=1):
81        """
82        Construct an ssh command with proper args for this host.
83
84        @param connect_timeout: connection timeout (in seconds)
85        @param options: SSH options
86        @param alive_interval: SSH Alive interval.
87        @param alive_count_max: SSH AliveCountMax.
88        @param connection_attempts: SSH ConnectionAttempts
89        """
90        options = " ".join([options, self._master_ssh.ssh_option])
91        base_cmd = self.make_ssh_command(user=self.user, port=self.port,
92                                         opts=options,
93                                         hosts_file=self.known_hosts_file,
94                                         connect_timeout=connect_timeout,
95                                         alive_interval=alive_interval,
96                                         alive_count_max=alive_count_max,
97                                         connection_attempts=connection_attempts)
98        return "%s %s" % (base_cmd, self.hostname)
99
100    def _get_server_stack_state(self, lowest_frames=0, highest_frames=None):
101        """ Get the server stack frame status.
102        @param lowest_frames: the lowest frames to start printing.
103        @param highest_frames: the highest frames to print.
104                        (None means no restriction)
105        """
106        stack_frames = inspect.stack()
107        stack = ''
108        for frame in stack_frames[lowest_frames:highest_frames]:
109            function_name = inspect.getframeinfo(frame[0]).function
110            stack = '%s|%s' % (function_name, stack)
111        del stack_frames
112        return stack[:-1] # Delete the last '|' character
113
114    def _verbose_logger_command(self, command):
115        """
116        Prepend the command for the client with information about the ssh
117        command to be executed and the server stack state.
118
119        @param command: the ssh command to be executed.
120        """
121        # The last few frames on the stack are not useful, so skip them.
122        stack = self._get_server_stack_state(lowest_frames=3, highest_frames=6)
123        # If logger executable exists on the DUT, use it to report the command.
124        # Then regardless of logger, run the command as usual.
125        command = ('test -x /usr/bin/logger && /usr/bin/logger'
126                   ' -t autotest "from [%s] ssh_run: %s"; %s'
127                   % (stack, utils.sh_escape(command), command))
128        return command
129
130
131    def _run(self, command, timeout, ignore_status,
132             stdout, stderr, connect_timeout, env, options, stdin, args,
133             ignore_timeout, ssh_failure_retry_ok):
134        """Helper function for run()."""
135        if connect_timeout > timeout:
136            # timeout passed from run() may be smaller than 1, because we
137            # subtract the elapsed time from the original timeout supplied.
138            connect_timeout = max(int(timeout), 1)
139        original_cmd = command
140
141        ssh_cmd = self.ssh_command(connect_timeout, options)
142        if not env.strip():
143            env = ""
144        else:
145            env = "export %s;" % env
146        for arg in args:
147            command += ' "%s"' % utils.sh_escape(arg)
148        full_cmd = '%s "%s %s"' % (ssh_cmd, env, utils.sh_escape(command))
149
150        # TODO(jrbarnette):  crbug.com/484726 - When we're in an SSP
151        # container, sometimes shortly after reboot we will see DNS
152        # resolution errors on ssh commands; the problem never
153        # occurs more than once in a row.  This especially affects
154        # the autoupdate_Rollback test, but other cases have been
155        # affected, too.
156        #
157        # We work around it by detecting the first DNS resolution error
158        # and retrying exactly one time.
159        dns_error_retry_count = 1
160
161        def counters_inc(counter_name, failure_name):
162            """Helper function to increment metrics counters.
163            @param counter_name: string indicating which counter to use
164            @param failure_name: string indentifying an error, or 'success'
165            """
166            if counter_name == 'call':
167                # ssh_counter records the outcome of each ssh invocation
168                # inside _run(), including exceptions.
169                ssh_counter = metrics.Counter('chromeos/autotest/ssh/calls')
170                fields = {'error' : failure_name or 'success',
171                          'attempt' : ssh_call_count}
172                ssh_counter.increment(fields=fields)
173
174            if counter_name == 'run':
175                # run_counter records each call to _run() with its result
176                # and how many tries were made.  Calls are recorded when
177                # _run() exits (including exiting with an exception)
178                run_counter = metrics.Counter('chromeos/autotest/ssh/runs')
179                fields = {'error' : failure_name or 'success',
180                          'attempt' : ssh_call_count}
181                run_counter.increment(fields=fields)
182
183        # If ssh_failure_retry_ok is True, retry twice on timeouts and generic
184        # error 255: if a simple retry doesn't work, kill the ssh master
185        # connection and try again.  (Note that either error could come from
186        # the command running in the DUT, in which case the retry may be
187        # useless but, in theory, also harmless.)
188        if ssh_failure_retry_ok:
189            # Ignore ssh command timeout, even though it could be a timeout due
190            # to the command executing in the remote host.  Note that passing
191            # ignore_timeout = True makes utils.run() return None on timeouts
192            # (and only on timeouts).
193            original_ignore_timeout = ignore_timeout
194            ignore_timeout = True
195            ssh_failure_retry_count = 2
196        else:
197            ssh_failure_retry_count = 0
198
199        ssh_call_count = 0
200
201        while True:
202            try:
203                # Increment call count first, in case utils.run() throws an
204                # exception.
205                ssh_call_count += 1
206                result = utils.run(full_cmd, timeout, True, stdout, stderr,
207                                   verbose=False, stdin=stdin,
208                                   stderr_is_expected=ignore_status,
209                                   ignore_timeout=ignore_timeout)
210            except Exception as e:
211                # No retries on exception.
212                counters_inc('call', 'exception')
213                counters_inc('run', 'exception')
214                raise e
215
216            failure_name = None
217
218            if result:
219                if result.exit_status == 255:
220                    if re.search(r'^ssh: .*: Name or service not known',
221                                 result.stderr):
222                        failure_name = 'dns_failure'
223                    else:
224                        failure_name = 'error_255'
225                elif result.exit_status > 0:
226                    failure_name = 'nonzero_status'
227            else:
228                # result == None
229                failure_name = 'timeout'
230
231            # Record the outcome of the ssh invocation.
232            counters_inc('call', failure_name)
233
234            if failure_name:
235                # There was a failure: decide whether to retry.
236                if failure_name == 'dns_failure':
237                    if dns_error_retry_count > 0:
238                        logging.debug('retrying ssh because of DNS failure')
239                        dns_error_retry_count -= 1
240                        continue
241                else:
242                    if ssh_failure_retry_count == 2:
243                        logging.debug('retrying ssh command after %s',
244                                       failure_name)
245                        ssh_failure_retry_count -= 1
246                        continue
247                    elif ssh_failure_retry_count == 1:
248                        # After two failures, restart the master connection
249                        # before the final try.
250                        logging.debug('retry 2: restarting master connection')
251                        self.restart_master_ssh()
252                        # Last retry: reinstate timeout behavior.
253                        ignore_timeout = original_ignore_timeout
254                        ssh_failure_retry_count -= 1
255                        continue
256
257            # No retry conditions occurred.  Exit the loop.
258            break
259
260        # The outcomes of ssh invocations have been recorded.  Now record
261        # the outcome of this function.
262
263        if ignore_timeout and not result:
264            counters_inc('run', 'ignored_timeout')
265            return None
266
267        # The error messages will show up in band (indistinguishable
268        # from stuff sent through the SSH connection), so we have the
269        # remote computer echo the message "Connected." before running
270        # any command.  Since the following 2 errors have to do with
271        # connecting, it's safe to do these checks.
272        if result.exit_status == 255:
273            if re.search(r'^ssh: connect to host .* port .*: '
274                         r'Connection timed out\r$', result.stderr):
275                counters_inc('run', 'final_timeout')
276                raise error.AutoservSSHTimeout(
277                        "ssh timed out: %r" % original_cmd.strip(), result)
278            if "Permission denied." in result.stderr:
279                msg = "ssh permission denied"
280                counters_inc('run', 'final_eperm')
281                raise error.AutoservSshPermissionDeniedError(msg, result)
282
283        if not ignore_status and result.exit_status > 0:
284            counters_inc('run', 'final_run_error')
285            msg = result.stderr.strip()
286            if not msg:
287                msg = result.stdout.strip()
288                if msg:
289                    msg = msg.splitlines()[-1]
290            raise error.AutoservRunError("command execution error (%d): %r" %
291                                         (result.exit_status, msg), result)
292
293        counters_inc('run', failure_name)
294        return result
295
296    def set_default_run_timeout(self, timeout):
297        """Set the default timeout for run."""
298        if timeout < 0:
299            raise error.TestError('Invalid timeout %d', timeout)
300        self._default_run_timeout = timeout
301
302    @THIS_IS_SLOW
303    def run(self, command, timeout=None, ignore_status=False,
304            stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
305            connect_timeout=30, options='', stdin=None, verbose=True, args=(),
306            ignore_timeout=False, ssh_failure_retry_ok=False):
307        """
308        Run a command on the remote host.
309        @note: This RPC call has an overhead of minimum 40ms and up to 400ms on
310               servers (crbug.com/734887). Each time a call is added for
311               every job, a server core dies in the lab.
312        @see: common_lib.hosts.host.run()
313
314        @param timeout: command execution timeout in seconds. Default is
315                        _default_run_timeout (1 hour).
316        @param connect_timeout: ssh connection timeout (in seconds)
317        @param options: string with additional ssh command options
318        @param verbose: log the commands
319        @param ignore_timeout: bool True if SSH command timeouts should be
320                ignored.  Will return None on command timeout.
321        @param ssh_failure_retry_ok: True if the command may be retried on
322                probable ssh failure (error 255 or timeout).  When true,
323                the command may be executed up to three times, the second
324                time after restarting the ssh master connection.  Use only for
325                commands that are idempotent, because when a "probable
326                ssh failure" occurs, we cannot tell if the command executed
327                or not.
328
329        @raises AutoservRunError: if the command failed
330        @raises AutoservSSHTimeout: ssh connection has timed out
331        """
332        # For example if the command is a list, we need to convert it to a
333        # string first.
334        if not isinstance(command, six.string_types):
335            command = ' '.join(command)
336
337        if timeout is None:
338            timeout = self._default_run_timeout
339        start_time = time.time()
340        with metrics.SecondsTimer('chromeos/autotest/ssh/master_ssh_time',
341                                  scale=0.001):
342            if verbose:
343                stack = self._get_server_stack_state(lowest_frames=1,
344                                                     highest_frames=7)
345                logging.debug("Running (ssh) '%s' from '%s'", command, stack)
346                command = self._verbose_logger_command(command)
347
348            self.start_master_ssh(min(
349                    timeout,
350                    self.DEFAULT_START_MASTER_SSH_TIMEOUT_S,
351            ))
352
353            env = " ".join("=".join(pair) for pair in six.iteritems(self.env))
354            elapsed = time.time() - start_time
355            try:
356                return self._run(command, timeout - elapsed, ignore_status,
357                                 stdout_tee, stderr_tee, connect_timeout, env,
358                                 options, stdin, args, ignore_timeout,
359                                 ssh_failure_retry_ok)
360            except error.CmdError as cmderr:
361                # We get a CmdError here only if there is timeout of that
362                # command. Catch that and stuff it into AutoservRunError and
363                # raise it.
364                timeout_message = str('Timeout encountered: %s' %
365                                      cmderr.args[0])
366                raise error.AutoservRunError(timeout_message, cmderr.args[1])
367
368
369    def run_background(self, command, verbose=True):
370        """Start a command on the host in the background.
371
372        The command is started on the host in the background, and
373        this method call returns immediately without waiting for the
374        command's completion.  The PID of the process on the host is
375        returned as a string.
376
377        The command may redirect its stdin, stdout, or stderr as
378        necessary.  Without redirection, all input and output will
379        use /dev/null.
380
381        @param command The command to run in the background
382        @param verbose As for `self.run()`
383
384        @return Returns the PID of the remote background process
385                as a string.
386        """
387        # Redirection here isn't merely hygienic; it's a functional
388        # requirement.  sshd won't terminate until stdin, stdout,
389        # and stderr are all closed.
390        #
391        # The subshell is needed to do the right thing in case the
392        # passed in command has its own I/O redirections.
393        cmd_fmt = '( %s ) </dev/null >/dev/null 2>&1 & echo -n $!'
394        return self.run(cmd_fmt % command, verbose=verbose).stdout
395
396
397    def run_short(self, command, **kwargs):
398        """
399        Calls the run() command with a short default timeout.
400
401        Takes the same arguments as does run(),
402        with the exception of the timeout argument which
403        here is fixed at 60 seconds.
404        It returns the result of run.
405
406        @param command: the command line string
407
408        """
409        return self.run(command, timeout=60, **kwargs)
410
411
412    def run_grep(self, command, timeout=30, ignore_status=False,
413                 stdout_ok_regexp=None, stdout_err_regexp=None,
414                 stderr_ok_regexp=None, stderr_err_regexp=None,
415                 connect_timeout=30):
416        """
417        Run a command on the remote host and look for regexp
418        in stdout or stderr to determine if the command was
419        successul or not.
420
421
422        @param command: the command line string
423        @param timeout: time limit in seconds before attempting to
424                        kill the running process. The run() function
425                        will take a few seconds longer than 'timeout'
426                        to complete if it has to kill the process.
427        @param ignore_status: do not raise an exception, no matter
428                              what the exit code of the command is.
429        @param stdout_ok_regexp: regexp that should be in stdout
430                                 if the command was successul.
431        @param stdout_err_regexp: regexp that should be in stdout
432                                  if the command failed.
433        @param stderr_ok_regexp: regexp that should be in stderr
434                                 if the command was successul.
435        @param stderr_err_regexp: regexp that should be in stderr
436                                 if the command failed.
437        @param connect_timeout: connection timeout (in seconds)
438
439        Returns:
440                if the command was successul, raises an exception
441                otherwise.
442
443        Raises:
444                AutoservRunError:
445                - the exit code of the command execution was not 0.
446                - If stderr_err_regexp is found in stderr,
447                - If stdout_err_regexp is found in stdout,
448                - If stderr_ok_regexp is not found in stderr.
449                - If stdout_ok_regexp is not found in stdout,
450        """
451
452        # We ignore the status, because we will handle it at the end.
453        result = self.run(command, timeout, ignore_status=True,
454                          connect_timeout=connect_timeout)
455
456        # Look for the patterns, in order
457        for (regexp, stream) in ((stderr_err_regexp, result.stderr),
458                                 (stdout_err_regexp, result.stdout)):
459            if regexp and stream:
460                err_re = re.compile (regexp)
461                if err_re.search(stream):
462                    raise error.AutoservRunError(
463                        '%r failed, found error pattern: %r' % (command,
464                                                                regexp), result)
465
466        for (regexp, stream) in ((stderr_ok_regexp, result.stderr),
467                                 (stdout_ok_regexp, result.stdout)):
468            if regexp and stream:
469                ok_re = re.compile (regexp)
470                if ok_re.search(stream):
471                    if ok_re.search(stream):
472                        return
473
474        if not ignore_status and result.exit_status > 0:
475            msg = result.stderr.strip()
476            if not msg:
477                msg = result.stdout.strip()
478                if msg:
479                    msg = msg.splitlines()[-1]
480            raise error.AutoservRunError("command execution error (%d): %r" %
481                                         (result.exit_status, msg), result)
482
483
484    def setup_ssh_key(self):
485        """Setup SSH Key"""
486        logging.debug('Performing SSH key setup on %s as %s.',
487                      self.host_port, self.user)
488
489        try:
490            host = pxssh.pxssh()
491            host.login(self.hostname, self.user, self.password,
492                        port=self.port)
493            public_key = utils.get_public_key()
494
495            host.sendline('mkdir -p ~/.ssh')
496            host.prompt()
497            host.sendline('chmod 700 ~/.ssh')
498            host.prompt()
499            host.sendline("echo '%s' >> ~/.ssh/authorized_keys; " %
500                            public_key)
501            host.prompt()
502            host.sendline('chmod 600 ~/.ssh/authorized_keys')
503            host.prompt()
504            host.logout()
505
506            logging.debug('SSH key setup complete.')
507
508        except:
509            logging.debug('SSH key setup has failed.')
510            try:
511                host.logout()
512            except:
513                pass
514
515
516    def setup_ssh(self):
517        """Setup SSH"""
518        if self.password:
519            try:
520                self.ssh_ping()
521            except error.AutoservSshPingHostError:
522                self.setup_ssh_key()
523