• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Lint as: python2, python3
2#
3# Copyright 2007 Google Inc. Released under the GPL v2
4
5"""
6This module defines the SSHHost class.
7
8Implementation details:
9You should import the "hosts" package instead of importing each type of host.
10
11        SSHHost: a remote machine with a ssh access
12"""
13
14from __future__ import absolute_import
15from __future__ import division
16from __future__ import print_function
17
18import inspect
19import logging
20import re
21import time
22
23import common
24from autotest_lib.client.common_lib import error
25from autotest_lib.client.common_lib import pxssh
26from autotest_lib.server import utils
27from autotest_lib.server.hosts import abstract_ssh
28import six
29
30# In case cros_host is being ran via SSP on an older Moblab version with an
31# older chromite version.
32try:
33    from autotest_lib.utils.frozen_chromite.lib import metrics
34except ImportError:
35    metrics = utils.metrics_mock
36
37
38def THIS_IS_SLOW(func):
39    """Mark the given function as slow, when looking at calls to it"""
40    func.__name__ = '%s__SLOW__' % func.__name__
41    return func
42
43
44class SSHHost(abstract_ssh.AbstractSSHHost):
45    """
46    This class represents a remote machine controlled through an ssh
47    session on which you can run programs.
48
49    It is not the machine autoserv is running on. The machine must be
50    configured for password-less login, for example through public key
51    authentication.
52
53    It includes support for controlling the machine through a serial
54    console on which you can run programs. If such a serial console is
55    set up on the machine then capabilities such as hard reset and
56    boot strap monitoring are available. If the machine does not have a
57    serial console available then ordinary SSH-based commands will
58    still be available, but attempts to use extensions such as
59    console logging or hard reset will fail silently.
60
61    Implementation details:
62    This is a leaf class in an abstract class hierarchy, it must
63    implement the unimplemented methods in parent classes.
64    """
65    RUN_TIMEOUT = 3600
66
67    def _initialize(self, hostname, *args, **dargs):
68        """
69        Construct a SSHHost object
70
71        Args:
72                hostname: network hostname or address of remote machine
73        """
74        super(SSHHost, self)._initialize(hostname=hostname, *args, **dargs)
75        self._default_run_timeout = self.RUN_TIMEOUT
76        self.setup_ssh()
77
78
79    def ssh_command(self, connect_timeout=30, options='', alive_interval=300,
80                    alive_count_max=3, connection_attempts=1):
81        """
82        Construct an ssh command with proper args for this host.
83
84        @param connect_timeout: connection timeout (in seconds)
85        @param options: SSH options
86        @param alive_interval: SSH Alive interval.
87        @param alive_count_max: SSH AliveCountMax.
88        @param connection_attempts: SSH ConnectionAttempts
89        """
90        options = " ".join([options, self._main_ssh.ssh_option])
91        base_cmd = self.make_ssh_command(user=self.user, port=self.port,
92                                         opts=options,
93                                         hosts_file=self.known_hosts_file,
94                                         connect_timeout=connect_timeout,
95                                         alive_interval=alive_interval,
96                                         alive_count_max=alive_count_max,
97                                         connection_attempts=connection_attempts)
98        return "%s %s" % (base_cmd, self.hostname)
99
100    def _get_server_stack_state(self, lowest_frames=0, highest_frames=None):
101        """ Get the server stack frame status.
102        @param lowest_frames: the lowest frames to start printing.
103        @param highest_frames: the highest frames to print.
104                        (None means no restriction)
105        """
106        stack_frames = inspect.stack()
107        stack = ''
108        for frame in stack_frames[lowest_frames:highest_frames]:
109            function_name = inspect.getframeinfo(frame[0]).function
110            stack = '%s|%s' % (function_name, stack)
111        del stack_frames
112        return stack[:-1] # Delete the last '|' character
113
114    def _verbose_logger_command(self, command):
115        """
116        Prepend the command for the client with information about the ssh
117        command to be executed and the server stack state.
118
119        @param command: the ssh command to be executed.
120        """
121        # The last few frames on the stack are not useful, so skip them.
122        stack = self._get_server_stack_state(lowest_frames=3, highest_frames=6)
123        # If logger executable exists on the DUT, use it to report the command.
124        # Then regardless of logger, run the command as usual.
125        command = ('test -x /usr/bin/logger && /usr/bin/logger'
126                   ' -t autotest "from [%s] ssh_run: %s"; %s'
127                   % (stack, utils.sh_escape(command), command))
128        return command
129
130    def _tls_run(self, original_cmd, timeout, ignore_status, stdout, stderr,
131                 args, ignore_timeout):
132        """Helper function for run(), uses the tls client."""
133        if not self.tls_connection.alive:
134            raise error.TLSConnectionError("TLS not connected.")
135        original_cmd = ' '.join([original_cmd] +
136                                [utils.sh_quote_word(arg) for arg in args])
137
138        try:
139            result = self.tls_exec_dut_command_client.run_cmd(original_cmd, timeout,
140                                                       stdout, stderr,
141                                                       ignore_timeout)
142        except Exception as e:
143            logging.warning("TLS Client run err %s", e)
144            raise e
145
146        if not ignore_status and result.exit_status > 0:
147            msg = result.stderr.strip()
148            if not msg:
149                msg = result.stdout.strip()
150                if msg:
151                    msg = msg.splitlines()[-1]
152            raise error.AutoservRunError(
153                    "command execution error using TLS (%d): %s" %
154                    (result.exit_status, msg), result)
155
156        return result
157
158    def _run(self, command, timeout, ignore_status, stdout, stderr,
159             connect_timeout, env, options, stdin, args, ignore_timeout,
160             ssh_failure_retry_ok, verbose):
161        """Helper function for run()."""
162        if connect_timeout > timeout:
163            # timeout passed from run() may be smaller than 1, because we
164            # subtract the elapsed time from the original timeout supplied.
165            connect_timeout = max(int(timeout), 1)
166        original_cmd = command
167
168        # If TLS client has been built, and not marked as unstable, use it.
169        # NOTE: if the tls_enabled setting in the config is not True, the
170        # client will not have been built.
171        use_tls = self.tls_exec_dut_command_client and not self.tls_unstable
172
173        if verbose:
174            stack = self._get_server_stack_state(lowest_frames=2,
175                                                 highest_frames=8)
176
177            logging.debug("Running (via %s) '%s' from '%s'",
178                          'TLS' if use_tls else 'SSH', command, stack)
179            command = self._verbose_logger_command(command)
180
181        if use_tls:
182            try:
183                return self._tls_run(command, timeout, ignore_status, stdout,
184                                     stderr, args, ignore_timeout)
185            except (error.AutoservRunError, error.CmdTimeoutError) as e:
186                raise e
187            except Exception as e:
188                # If TLS fails for unknown reason, we will revert to normal ssh.
189                logging.warning(
190                        "Unexpected TLS cmd failed. Reverting to SSH.\n %s", e)
191
192                # Note the TLS as unstable so we do not attempt to re-start it.
193                self.tls_unstable = True
194
195        ssh_cmd = self.ssh_command(connect_timeout, options)
196        if not env.strip():
197            env = ""
198        else:
199            env = "export %s;" % env
200        for arg in args:
201            command += ' "%s"' % utils.sh_escape(arg)
202        full_cmd = '%s "%s %s"' % (ssh_cmd, env, utils.sh_escape(command))
203
204        def counters_inc(counter_name, failure_name):
205            """Helper function to increment metrics counters.
206            @param counter_name: string indicating which counter to use
207            @param failure_name: string indentifying an error, or 'success'
208            """
209            if counter_name == 'call':
210                # ssh_counter records the outcome of each ssh invocation
211                # inside _run(), including exceptions.
212                ssh_counter = metrics.Counter('chromeos/autotest/ssh/calls')
213                fields = {'error' : failure_name or 'success',
214                          'attempt' : ssh_call_count}
215                ssh_counter.increment(fields=fields)
216
217            if counter_name == 'run':
218                # run_counter records each call to _run() with its result
219                # and how many tries were made.  Calls are recorded when
220                # _run() exits (including exiting with an exception)
221                run_counter = metrics.Counter('chromeos/autotest/ssh/runs')
222                fields = {'error' : failure_name or 'success',
223                          'attempt' : ssh_call_count}
224                run_counter.increment(fields=fields)
225
226        # If ssh_failure_retry_ok is True, retry twice on timeouts and generic
227        # error 255: if a simple retry doesn't work, kill the ssh main
228        # connection and try again.  (Note that either error could come from
229        # the command running in the DUT, in which case the retry may be
230        # useless but, in theory, also harmless.)
231        if ssh_failure_retry_ok:
232            # Ignore ssh command timeout, even though it could be a timeout due
233            # to the command executing in the remote host.  Note that passing
234            # ignore_timeout = True makes utils.run() return None on timeouts
235            # (and only on timeouts).
236            original_ignore_timeout = ignore_timeout
237            ignore_timeout = True
238            ssh_failure_retry_count = 2
239        else:
240            ssh_failure_retry_count = 0
241
242        ssh_call_count = 0
243
244        while True:
245            try:
246                # Increment call count first, in case utils.run() throws an
247                # exception.
248                ssh_call_count += 1
249                result = utils.run(full_cmd, timeout, True, stdout, stderr,
250                                   verbose=False, stdin=stdin,
251                                   stderr_is_expected=ignore_status,
252                                   ignore_timeout=ignore_timeout)
253            except Exception as e:
254                # No retries on exception.
255                counters_inc('call', 'exception')
256                counters_inc('run', 'exception')
257                raise e
258
259            failure_name = None
260
261            if result:
262                if result.exit_status == 255:
263                    if re.search(r'^ssh: .*: Name or service not known',
264                                 result.stderr):
265                        failure_name = 'dns_failure'
266                    else:
267                        failure_name = 'error_255'
268                elif result.exit_status > 0:
269                    failure_name = 'nonzero_status'
270            else:
271                # result == None
272                failure_name = 'timeout'
273
274            # Record the outcome of the ssh invocation.
275            counters_inc('call', failure_name)
276
277            if failure_name:
278                # There was a failure: decide whether to retry.
279                if failure_name == 'dns_failure':
280                    raise error.AutoservSshDnsError("DNS Failure: ", result)
281                else:
282                    if ssh_failure_retry_count == 2:
283                        logging.debug('retrying ssh command after %s',
284                                       failure_name)
285                        ssh_failure_retry_count -= 1
286                        continue
287                    elif ssh_failure_retry_count == 1:
288                        # After two failures, restart the main connection
289                        # before the final try.
290                        stack = self._get_server_stack_state(lowest_frames=1,
291                                                             highest_frames=7)
292                        logging.debug(
293                                'retry 2: restarting main connection from \'%s\'',
294                                stack)
295                        self.restart_main_ssh()
296                        # Last retry: reinstate timeout behavior.
297                        ignore_timeout = original_ignore_timeout
298                        ssh_failure_retry_count -= 1
299                        continue
300
301            # No retry conditions occurred.  Exit the loop.
302            break
303
304        # The outcomes of ssh invocations have been recorded.  Now record
305        # the outcome of this function.
306
307        if ignore_timeout and not result:
308            counters_inc('run', 'ignored_timeout')
309            return None
310
311        # The error messages will show up in band (indistinguishable
312        # from stuff sent through the SSH connection), so we have the
313        # remote computer echo the message "Connected." before running
314        # any command.  Since the following 2 errors have to do with
315        # connecting, it's safe to do these checks.
316        if result.exit_status == 255:
317            if re.search(r'^ssh: connect to host .* port .*: '
318                         r'Connection timed out\r$', result.stderr):
319                counters_inc('run', 'final_timeout')
320                raise error.AutoservSSHTimeout(
321                        "ssh timed out: %r" % original_cmd.strip(), result)
322            if "Permission denied." in result.stderr:
323                msg = "ssh permission denied"
324                counters_inc('run', 'final_eperm')
325                raise error.AutoservSshPermissionDeniedError(msg, result)
326
327        if not ignore_status and result.exit_status > 0:
328            counters_inc('run', 'final_run_error')
329            msg = result.stderr.strip()
330            if not msg:
331                msg = result.stdout.strip()
332                if msg:
333                    msg = msg.splitlines()[-1]
334            raise error.AutoservRunError("command execution error (%d): %r" %
335                                         (result.exit_status, msg), result)
336
337        counters_inc('run', failure_name)
338        return result
339
340    def set_default_run_timeout(self, timeout):
341        """Set the default timeout for run."""
342        if timeout < 0:
343            raise error.TestError('Invalid timeout %d', timeout)
344        self._default_run_timeout = timeout
345
346    @THIS_IS_SLOW
347    def run(self, command, timeout=None, ignore_status=False,
348            stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
349            connect_timeout=30, options='', stdin=None, verbose=True, args=(),
350            ignore_timeout=False, ssh_failure_retry_ok=False):
351        """
352        Run a command on the remote host.
353        @note: This RPC call has an overhead of minimum 40ms and up to 400ms on
354               servers (crbug.com/734887). Each time a call is added for
355               every job, a server core dies in the lab.
356        @see: common_lib.hosts.host.run()
357
358        @param timeout: command execution timeout in seconds. Default is
359                        _default_run_timeout (1 hour).
360        @param connect_timeout: ssh connection timeout (in seconds)
361        @param options: string with additional ssh command options
362        @param verbose: log the commands
363        @param ignore_timeout: bool True if SSH command timeouts should be
364                ignored.  Will return None on command timeout.
365        @param ssh_failure_retry_ok: True if the command may be retried on
366                probable ssh failure (error 255 or timeout).  When true,
367                the command may be executed up to three times, the second
368                time after restarting the ssh main connection.  Use only for
369                commands that are idempotent, because when a "probable
370                ssh failure" occurs, we cannot tell if the command executed
371                or not.
372
373        @raises AutoservRunError: if the command failed
374        @raises AutoservSSHTimeout: ssh connection has timed out
375        """
376        # For example if the command is a list, we need to convert it to a
377        # string first.
378        if not isinstance(command, six.string_types):
379            command = ' '.join(command)
380
381        if timeout is None:
382            timeout = self._default_run_timeout
383        start_time = time.time()
384        with metrics.SecondsTimer('chromeos/autotest/ssh/main_ssh_time',
385                                  scale=0.001):
386
387            self.start_main_ssh(min(
388                    timeout,
389                    self.DEFAULT_START_MAIN_SSH_TIMEOUT_S,
390            ))
391
392            env = " ".join("=".join(pair) for pair in six.iteritems(self.env))
393            elapsed = time.time() - start_time
394            try:
395                return self._run(command, timeout - elapsed, ignore_status,
396                                 stdout_tee, stderr_tee, connect_timeout, env,
397                                 options, stdin, args, ignore_timeout,
398                                 ssh_failure_retry_ok, verbose)
399            except error.CmdError as cmderr:
400                # We get a CmdError here only if there is timeout of that
401                # command. Catch that and stuff it into AutoservRunError and
402                # raise it.
403                timeout_message = str('Timeout encountered: %s' %
404                                      cmderr.args[0])
405                raise error.AutoservRunError(timeout_message, cmderr.args[1])
406
407
408    def run_background(self, command, verbose=True):
409        """Start a command on the host in the background.
410
411        The command is started on the host in the background, and
412        this method call returns immediately without waiting for the
413        command's completion.  The PID of the process on the host is
414        returned as a string.
415
416        The command may redirect its stdin, stdout, or stderr as
417        necessary.  Without redirection, all input and output will
418        use /dev/null.
419
420        @param command The command to run in the background
421        @param verbose As for `self.run()`
422
423        @return Returns the PID of the remote background process
424                as a string.
425        """
426        # Redirection here isn't merely hygienic; it's a functional
427        # requirement.  sshd won't terminate until stdin, stdout,
428        # and stderr are all closed.
429        #
430        # The subshell is needed to do the right thing in case the
431        # passed in command has its own I/O redirections.
432        cmd_fmt = '( %s ) </dev/null >/dev/null 2>&1 & echo -n $!'
433        return self.run(cmd_fmt % command, verbose=verbose).stdout
434
435
436    def run_short(self, command, **kwargs):
437        """
438        Calls the run() command with a short default timeout.
439
440        Takes the same arguments as does run(),
441        with the exception of the timeout argument which
442        here is fixed at 60 seconds.
443        It returns the result of run.
444
445        @param command: the command line string
446
447        """
448        return self.run(command, timeout=60, **kwargs)
449
450
451    def run_grep(self, command, timeout=30, ignore_status=False,
452                 stdout_ok_regexp=None, stdout_err_regexp=None,
453                 stderr_ok_regexp=None, stderr_err_regexp=None,
454                 connect_timeout=30):
455        """
456        Run a command on the remote host and look for regexp
457        in stdout or stderr to determine if the command was
458        successul or not.
459
460
461        @param command: the command line string
462        @param timeout: time limit in seconds before attempting to
463                        kill the running process. The run() function
464                        will take a few seconds longer than 'timeout'
465                        to complete if it has to kill the process.
466        @param ignore_status: do not raise an exception, no matter
467                              what the exit code of the command is.
468        @param stdout_ok_regexp: regexp that should be in stdout
469                                 if the command was successul.
470        @param stdout_err_regexp: regexp that should be in stdout
471                                  if the command failed.
472        @param stderr_ok_regexp: regexp that should be in stderr
473                                 if the command was successul.
474        @param stderr_err_regexp: regexp that should be in stderr
475                                 if the command failed.
476        @param connect_timeout: connection timeout (in seconds)
477
478        Returns:
479                if the command was successul, raises an exception
480                otherwise.
481
482        Raises:
483                AutoservRunError:
484                - the exit code of the command execution was not 0.
485                - If stderr_err_regexp is found in stderr,
486                - If stdout_err_regexp is found in stdout,
487                - If stderr_ok_regexp is not found in stderr.
488                - If stdout_ok_regexp is not found in stdout,
489        """
490
491        # We ignore the status, because we will handle it at the end.
492        result = self.run(command, timeout, ignore_status=True,
493                          connect_timeout=connect_timeout)
494
495        # Look for the patterns, in order
496        for (regexp, stream) in ((stderr_err_regexp, result.stderr),
497                                 (stdout_err_regexp, result.stdout)):
498            if regexp and stream:
499                err_re = re.compile (regexp)
500                if err_re.search(stream):
501                    raise error.AutoservRunError(
502                        '%r failed, found error pattern: %r' % (command,
503                                                                regexp), result)
504
505        for (regexp, stream) in ((stderr_ok_regexp, result.stderr),
506                                 (stdout_ok_regexp, result.stdout)):
507            if regexp and stream:
508                ok_re = re.compile (regexp)
509                if ok_re.search(stream):
510                    if ok_re.search(stream):
511                        return
512
513        if not ignore_status and result.exit_status > 0:
514            msg = result.stderr.strip()
515            if not msg:
516                msg = result.stdout.strip()
517                if msg:
518                    msg = msg.splitlines()[-1]
519            raise error.AutoservRunError("command execution error (%d): %r" %
520                                         (result.exit_status, msg), result)
521
522
523    def setup_ssh_key(self):
524        """Setup SSH Key"""
525        logging.debug('Performing SSH key setup on %s as %s.',
526                      self.host_port, self.user)
527
528        try:
529            host = pxssh.pxssh()
530            host.login(self.hostname, self.user, self.password,
531                        port=self.port)
532            public_key = utils.get_public_key()
533
534            host.sendline('mkdir -p ~/.ssh')
535            host.prompt()
536            host.sendline('chmod 700 ~/.ssh')
537            host.prompt()
538            host.sendline("echo '%s' >> ~/.ssh/authorized_keys; " %
539                            public_key)
540            host.prompt()
541            host.sendline('chmod 600 ~/.ssh/authorized_keys')
542            host.prompt()
543            host.logout()
544
545            logging.debug('SSH key setup complete.')
546
547        except:
548            logging.debug('SSH key setup has failed.')
549            try:
550                host.logout()
551            except:
552                pass
553
554
555    def setup_ssh(self):
556        """Setup SSH"""
557        if self.password:
558            try:
559                self.ssh_ping()
560            except error.AutoservSshPingHostError:
561                self.setup_ssh_key()
562