• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#
2# Copyright 2007 Google Inc. Released under the GPL v2
3
4"""
5This module defines the SSHHost class.
6
7Implementation details:
8You should import the "hosts" package instead of importing each type of host.
9
10        SSHHost: a remote machine with a ssh access
11"""
12
13import inspect
14import logging
15import re
16import time
17
18import common
19from autotest_lib.client.common_lib import error
20from autotest_lib.client.common_lib import pxssh
21from autotest_lib.server import utils
22from autotest_lib.server.hosts import abstract_ssh
23
24# In case cros_host is being ran via SSP on an older Moblab version with an
25# older chromite version.
26try:
27    from chromite.lib import metrics
28except ImportError:
29    metrics = utils.metrics_mock
30
31
32class SSHHost(abstract_ssh.AbstractSSHHost):
33    """
34    This class represents a remote machine controlled through an ssh
35    session on which you can run programs.
36
37    It is not the machine autoserv is running on. The machine must be
38    configured for password-less login, for example through public key
39    authentication.
40
41    It includes support for controlling the machine through a serial
42    console on which you can run programs. If such a serial console is
43    set up on the machine then capabilities such as hard reset and
44    boot strap monitoring are available. If the machine does not have a
45    serial console available then ordinary SSH-based commands will
46    still be available, but attempts to use extensions such as
47    console logging or hard reset will fail silently.
48
49    Implementation details:
50    This is a leaf class in an abstract class hierarchy, it must
51    implement the unimplemented methods in parent classes.
52    """
53
54    def _initialize(self, hostname, *args, **dargs):
55        """
56        Construct a SSHHost object
57
58        Args:
59                hostname: network hostname or address of remote machine
60        """
61        super(SSHHost, self)._initialize(hostname=hostname, *args, **dargs)
62        self.setup_ssh()
63
64
65    def ssh_command(self, connect_timeout=30, options='', alive_interval=300,
66                    alive_count_max=3, connection_attempts=1):
67        """
68        Construct an ssh command with proper args for this host.
69
70        @param connect_timeout: connection timeout (in seconds)
71        @param options: SSH options
72        @param alive_interval: SSH Alive interval.
73        @param alive_count_max: SSH AliveCountMax.
74        @param connection_attempts: SSH ConnectionAttempts
75        """
76        options = " ".join([options, self._master_ssh.ssh_option])
77        base_cmd = self.make_ssh_command(user=self.user, port=self.port,
78                                         opts=options,
79                                         hosts_file=self.known_hosts_file,
80                                         connect_timeout=connect_timeout,
81                                         alive_interval=alive_interval,
82                                         alive_count_max=alive_count_max,
83                                         connection_attempts=connection_attempts)
84        return "%s %s" % (base_cmd, self.hostname)
85
86    def _get_server_stack_state(self, lowest_frames=0, highest_frames=None):
87        """ Get the server stack frame status.
88        @param lowest_frames: the lowest frames to start printing.
89        @param highest_frames: the highest frames to print.
90                        (None means no restriction)
91        """
92        stack_frames = inspect.stack()
93        stack = ''
94        for frame in stack_frames[lowest_frames:highest_frames]:
95            function_name = inspect.getframeinfo(frame[0]).function
96            stack = '%s|%s' % (function_name, stack)
97        del stack_frames
98        return stack[:-1] # Delete the last '|' character
99
100    def _verbose_logger_command(self, command):
101        """
102        Prepend the command for the client with information about the ssh
103        command to be executed and the server stack state.
104
105        @param command: the ssh command to be executed.
106        """
107        # The last few frames on the stack are not useful, so skip them.
108        stack = self._get_server_stack_state(lowest_frames=3, highest_frames=6)
109        # If logger executable exists on the DUT, use it to report the command.
110        # Then regardless of logger, run the command as usual.
111        command = ('test -x /usr/bin/logger && /usr/bin/logger --id=$$ '
112                   '--tag=autotest "from [%s] ssh_run: %s"; %s'
113                   % (stack, utils.sh_escape(command), command))
114        return command
115
116
117    def _run(self, command, timeout, ignore_status,
118             stdout, stderr, connect_timeout, env, options, stdin, args,
119             ignore_timeout, ssh_failure_retry_ok):
120        """Helper function for run()."""
121        if connect_timeout > timeout:
122            connect_timeout = int(timeout)
123        original_cmd = command
124
125        ssh_cmd = self.ssh_command(connect_timeout, options)
126        if not env.strip():
127            env = ""
128        else:
129            env = "export %s;" % env
130        for arg in args:
131            command += ' "%s"' % utils.sh_escape(arg)
132        full_cmd = '%s "%s %s"' % (ssh_cmd, env, utils.sh_escape(command))
133
134        # TODO(jrbarnette):  crbug.com/484726 - When we're in an SSP
135        # container, sometimes shortly after reboot we will see DNS
136        # resolution errors on ssh commands; the problem never
137        # occurs more than once in a row.  This especially affects
138        # the autoupdate_Rollback test, but other cases have been
139        # affected, too.
140        #
141        # We work around it by detecting the first DNS resolution error
142        # and retrying exactly one time.
143        dns_error_retry_count = 1
144
145        def counters_inc(counter_name, failure_name):
146            """Helper function to increment metrics counters.
147            @param counter_name: string indicating which counter to use
148            @param failure_name: string indentifying an error, or 'success'
149            """
150            if counter_name == 'call':
151                # ssh_counter records the outcome of each ssh invocation
152                # inside _run(), including exceptions.
153                ssh_counter = metrics.Counter('chromeos/autotest/ssh/calls')
154                fields = {'error' : failure_name or 'success',
155                          'attempt' : ssh_call_count}
156                ssh_counter.increment(fields=fields)
157
158            if counter_name == 'run':
159                # run_counter records each call to _run() with its result
160                # and how many tries were made.  Calls are recorded when
161                # _run() exits (including exiting with an exception)
162                run_counter = metrics.Counter('chromeos/autotest/ssh/runs')
163                fields = {'error' : failure_name or 'success',
164                          'attempt' : ssh_call_count}
165                run_counter.increment(fields=fields)
166
167        # If ssh_failure_retry_ok is True, retry twice on timeouts and generic
168        # error 255: if a simple retry doesn't work, kill the ssh master
169        # connection and try again.  (Note that either error could come from
170        # the command running in the DUT, in which case the retry may be
171        # useless but, in theory, also harmless.)
172        if ssh_failure_retry_ok:
173            # Ignore ssh command timeout, even though it could be a timeout due
174            # to the command executing in the remote host.  Note that passing
175            # ignore_timeout = True makes utils.run() return None on timeouts
176            # (and only on timeouts).
177            original_ignore_timeout = ignore_timeout
178            ignore_timeout = True
179            ssh_failure_retry_count = 2
180        else:
181            ssh_failure_retry_count = 0
182
183        ssh_call_count = 0
184
185        while True:
186            try:
187                # Increment call count first, in case utils.run() throws an
188                # exception.
189                ssh_call_count += 1
190                result = utils.run(full_cmd, timeout, True, stdout, stderr,
191                                   verbose=False, stdin=stdin,
192                                   stderr_is_expected=ignore_status,
193                                   ignore_timeout=ignore_timeout)
194            except Exception as e:
195                # No retries on exception.
196                counters_inc('call', 'exception')
197                counters_inc('run', 'exception')
198                raise e
199
200            failure_name = None
201
202            if result:
203                if result.exit_status == 255:
204                    if re.search(r'^ssh: .*: Name or service not known',
205                                 result.stderr):
206                        failure_name = 'dns_failure'
207                    else:
208                        failure_name = 'error_255'
209                elif result.exit_status > 0:
210                    failure_name = 'nonzero_status'
211            else:
212                # result == None
213                failure_name = 'timeout'
214
215            # Record the outcome of the ssh invocation.
216            counters_inc('call', failure_name)
217
218            if failure_name:
219                # There was a failure: decide whether to retry.
220                if failure_name == 'dns_failure':
221                    if dns_error_retry_count > 0:
222                        logging.debug('retrying ssh because of DNS failure')
223                        dns_error_retry_count -= 1
224                        continue
225                else:
226                    if ssh_failure_retry_count == 2:
227                        logging.debug('retrying ssh command after %s',
228                                       failure_name)
229                        ssh_failure_retry_count -= 1
230                        continue
231                    elif ssh_failure_retry_count == 1:
232                        # After two failures, restart the master connection
233                        # before the final try.
234                        logging.debug('retry 2: restarting master connection')
235                        self.restart_master_ssh()
236                        # Last retry: reinstate timeout behavior.
237                        ignore_timeout = original_ignore_timeout
238                        ssh_failure_retry_count -= 1
239                        continue
240
241            # No retry conditions occurred.  Exit the loop.
242            break
243
244        # The outcomes of ssh invocations have been recorded.  Now record
245        # the outcome of this function.
246
247        if ignore_timeout and not result:
248            counters_inc('run', 'ignored_timeout')
249            return None
250
251        # The error messages will show up in band (indistinguishable
252        # from stuff sent through the SSH connection), so we have the
253        # remote computer echo the message "Connected." before running
254        # any command.  Since the following 2 errors have to do with
255        # connecting, it's safe to do these checks.
256        if result.exit_status == 255:
257            if re.search(r'^ssh: connect to host .* port .*: '
258                         r'Connection timed out\r$', result.stderr):
259                counters_inc('run', 'final_timeout')
260                raise error.AutoservSSHTimeout(
261                        "ssh timed out: %s" % original_cmd.strip(), result)
262            if "Permission denied." in result.stderr:
263                msg = "ssh permission denied"
264                counters_inc('run', 'final_eperm')
265                raise error.AutoservSshPermissionDeniedError(msg, result)
266
267        if not ignore_status and result.exit_status > 0:
268            counters_inc('run', 'final_run_error')
269            msg = result.stderr.strip()
270            if not msg:
271                msg = result.stdout.strip()
272                if msg:
273                    msg = msg.splitlines()[-1]
274            raise error.AutoservRunError("command execution error (%d): %s" %
275                                         (result.exit_status, msg), result)
276
277        counters_inc('run', failure_name)
278        return result
279
280
281    def run_very_slowly(self, command, timeout=None, ignore_status=False,
282            stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
283            connect_timeout=30, options='', stdin=None, verbose=True, args=(),
284            ignore_timeout=False, ssh_failure_retry_ok=False):
285        """
286        Run a command on the remote host.
287        @note: This RPC call has an overhead of minimum 40ms and up to 400ms on
288               servers (crbug.com/734887). Each time a call is added for
289               every job, a server core dies in the lab.
290        @see: common_lib.hosts.host.run()
291
292        @param timeout: command execution timeout in seconds. Default is 1 hour.
293        @param connect_timeout: ssh connection timeout (in seconds)
294        @param options: string with additional ssh command options
295        @param verbose: log the commands
296        @param ignore_timeout: bool True if SSH command timeouts should be
297                ignored.  Will return None on command timeout.
298        @param ssh_failure_retry_ok: True if the command may be retried on
299                probable ssh failure (error 255 or timeout).  When true,
300                the command may be executed up to three times, the second
301                time after restarting the ssh master connection.  Use only for
302                commands that are idempotent, because when a "probable
303                ssh failure" occurs, we cannot tell if the command executed
304                or not.
305
306        @raises AutoservRunError: if the command failed
307        @raises AutoservSSHTimeout: ssh connection has timed out
308        """
309        if timeout is None:
310            timeout = 3600
311        start_time = time.time()
312        with metrics.SecondsTimer('chromeos/autotest/ssh/master_ssh_time',
313                                  scale=0.001):
314            if verbose:
315                stack = self._get_server_stack_state(lowest_frames=1,
316                                                     highest_frames=7)
317                logging.debug("Running (ssh) '%s' from '%s'", command, stack)
318                command = self._verbose_logger_command(command)
319
320            self.start_master_ssh(min(
321                    timeout,
322                    self.DEFAULT_START_MASTER_SSH_TIMEOUT_S,
323            ))
324
325            env = " ".join("=".join(pair) for pair in self.env.iteritems())
326            elapsed = time.time() - start_time
327            try:
328                return self._run(command, timeout - elapsed, ignore_status,
329                                 stdout_tee, stderr_tee, connect_timeout, env,
330                                 options, stdin, args, ignore_timeout,
331                                 ssh_failure_retry_ok)
332            except error.CmdError, cmderr:
333                # We get a CmdError here only if there is timeout of that
334                # command. Catch that and stuff it into AutoservRunError and
335                # raise it.
336                timeout_message = str('Timeout encountered: %s' %
337                                      cmderr.args[0])
338                raise error.AutoservRunError(timeout_message, cmderr.args[1])
339
340
341    run = run_very_slowly
342
343
344    def run_background(self, command, verbose=True):
345        """Start a command on the host in the background.
346
347        The command is started on the host in the background, and
348        this method call returns immediately without waiting for the
349        command's completion.  The PID of the process on the host is
350        returned as a string.
351
352        The command may redirect its stdin, stdout, or stderr as
353        necessary.  Without redirection, all input and output will
354        use /dev/null.
355
356        @param command The command to run in the background
357        @param verbose As for `self.run()`
358
359        @return Returns the PID of the remote background process
360                as a string.
361        """
362        # Redirection here isn't merely hygienic; it's a functional
363        # requirement.  sshd won't terminate until stdin, stdout,
364        # and stderr are all closed.
365        #
366        # The subshell is needed to do the right thing in case the
367        # passed in command has its own I/O redirections.
368        cmd_fmt = '( %s ) </dev/null >/dev/null 2>&1 & echo -n $!'
369        return self.run(cmd_fmt % command, verbose=verbose).stdout
370
371
372    def run_short(self, command, **kwargs):
373        """
374        Calls the run() command with a short default timeout.
375
376        Takes the same arguments as does run(),
377        with the exception of the timeout argument which
378        here is fixed at 60 seconds.
379        It returns the result of run.
380
381        @param command: the command line string
382
383        """
384        return self.run(command, timeout=60, **kwargs)
385
386
387    def run_grep(self, command, timeout=30, ignore_status=False,
388                 stdout_ok_regexp=None, stdout_err_regexp=None,
389                 stderr_ok_regexp=None, stderr_err_regexp=None,
390                 connect_timeout=30):
391        """
392        Run a command on the remote host and look for regexp
393        in stdout or stderr to determine if the command was
394        successul or not.
395
396
397        @param command: the command line string
398        @param timeout: time limit in seconds before attempting to
399                        kill the running process. The run() function
400                        will take a few seconds longer than 'timeout'
401                        to complete if it has to kill the process.
402        @param ignore_status: do not raise an exception, no matter
403                              what the exit code of the command is.
404        @param stdout_ok_regexp: regexp that should be in stdout
405                                 if the command was successul.
406        @param stdout_err_regexp: regexp that should be in stdout
407                                  if the command failed.
408        @param stderr_ok_regexp: regexp that should be in stderr
409                                 if the command was successul.
410        @param stderr_err_regexp: regexp that should be in stderr
411                                 if the command failed.
412        @param connect_timeout: connection timeout (in seconds)
413
414        Returns:
415                if the command was successul, raises an exception
416                otherwise.
417
418        Raises:
419                AutoservRunError:
420                - the exit code of the command execution was not 0.
421                - If stderr_err_regexp is found in stderr,
422                - If stdout_err_regexp is found in stdout,
423                - If stderr_ok_regexp is not found in stderr.
424                - If stdout_ok_regexp is not found in stdout,
425        """
426
427        # We ignore the status, because we will handle it at the end.
428        result = self.run(command, timeout, ignore_status=True,
429                          connect_timeout=connect_timeout)
430
431        # Look for the patterns, in order
432        for (regexp, stream) in ((stderr_err_regexp, result.stderr),
433                                 (stdout_err_regexp, result.stdout)):
434            if regexp and stream:
435                err_re = re.compile (regexp)
436                if err_re.search(stream):
437                    raise error.AutoservRunError(
438                        '%s failed, found error pattern: "%s"' % (command,
439                                                                regexp), result)
440
441        for (regexp, stream) in ((stderr_ok_regexp, result.stderr),
442                                 (stdout_ok_regexp, result.stdout)):
443            if regexp and stream:
444                ok_re = re.compile (regexp)
445                if ok_re.search(stream):
446                    if ok_re.search(stream):
447                        return
448
449        if not ignore_status and result.exit_status > 0:
450            msg = result.stderr.strip()
451            if not msg:
452                msg = result.stdout.strip()
453                if msg:
454                    msg = msg.splitlines()[-1]
455            raise error.AutoservRunError("command execution error (%d): %s" %
456                                         (result.exit_status, msg), result)
457
458
459    def setup_ssh_key(self):
460        """Setup SSH Key"""
461        logging.debug('Performing SSH key setup on %s as %s.',
462                      self.host_port, self.user)
463
464        try:
465            host = pxssh.pxssh()
466            host.login(self.hostname, self.user, self.password,
467                        port=self.port)
468            public_key = utils.get_public_key()
469
470            host.sendline('mkdir -p ~/.ssh')
471            host.prompt()
472            host.sendline('chmod 700 ~/.ssh')
473            host.prompt()
474            host.sendline("echo '%s' >> ~/.ssh/authorized_keys; " %
475                            public_key)
476            host.prompt()
477            host.sendline('chmod 600 ~/.ssh/authorized_keys')
478            host.prompt()
479            host.logout()
480
481            logging.debug('SSH key setup complete.')
482
483        except:
484            logging.debug('SSH key setup has failed.')
485            try:
486                host.logout()
487            except:
488                pass
489
490
491    def setup_ssh(self):
492        """Setup SSH"""
493        if self.password:
494            try:
495                self.ssh_ping()
496            except error.AutoservSshPingHostError:
497                self.setup_ssh_key()
498