• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#
2# Copyright 2007 Google Inc. Released under the GPL v2
3
4"""
5This module defines the SSHHost class.
6
7Implementation details:
8You should import the "hosts" package instead of importing each type of host.
9
10        SSHHost: a remote machine with a ssh access
11"""
12
13import inspect
14import logging
15import re
16from autotest_lib.client.common_lib import error
17from autotest_lib.client.common_lib import pxssh
18from autotest_lib.server import utils
19from autotest_lib.server.hosts import abstract_ssh
20
21# In case cros_host is being ran via SSP on an older Moblab version with an
22# older chromite version.
23try:
24    from chromite.lib import metrics
25except ImportError:
26    metrics = utils.metrics_mock
27
28
29class SSHHost(abstract_ssh.AbstractSSHHost):
30    """
31    This class represents a remote machine controlled through an ssh
32    session on which you can run programs.
33
34    It is not the machine autoserv is running on. The machine must be
35    configured for password-less login, for example through public key
36    authentication.
37
38    It includes support for controlling the machine through a serial
39    console on which you can run programs. If such a serial console is
40    set up on the machine then capabilities such as hard reset and
41    boot strap monitoring are available. If the machine does not have a
42    serial console available then ordinary SSH-based commands will
43    still be available, but attempts to use extensions such as
44    console logging or hard reset will fail silently.
45
46    Implementation details:
47    This is a leaf class in an abstract class hierarchy, it must
48    implement the unimplemented methods in parent classes.
49    """
50
51    def _initialize(self, hostname, *args, **dargs):
52        """
53        Construct a SSHHost object
54
55        Args:
56                hostname: network hostname or address of remote machine
57        """
58        super(SSHHost, self)._initialize(hostname=hostname, *args, **dargs)
59        self.setup_ssh()
60
61
62    def ssh_command(self, connect_timeout=30, options='', alive_interval=300,
63                    alive_count_max=3, connection_attempts=1):
64        """
65        Construct an ssh command with proper args for this host.
66
67        @param connect_timeout: connection timeout (in seconds)
68        @param options: SSH options
69        @param alive_interval: SSH Alive interval.
70        @param alive_count_max: SSH AliveCountMax.
71        @param connection_attempts: SSH ConnectionAttempts
72        """
73        options = " ".join([options, self._master_ssh.ssh_option])
74        base_cmd = self.make_ssh_command(user=self.user, port=self.port,
75                                         opts=options,
76                                         hosts_file=self.known_hosts_file,
77                                         connect_timeout=connect_timeout,
78                                         alive_interval=alive_interval,
79                                         alive_count_max=alive_count_max,
80                                         connection_attempts=connection_attempts)
81        return "%s %s" % (base_cmd, self.hostname)
82
83    def _get_server_stack_state(self, lowest_frames=0, highest_frames=None):
84        """ Get the server stack frame status.
85        @param lowest_frames: the lowest frames to start printing.
86        @param highest_frames: the highest frames to print.
87                        (None means no restriction)
88        """
89        stack_frames = inspect.stack()
90        stack = ''
91        for frame in stack_frames[lowest_frames:highest_frames]:
92            function_name = inspect.getframeinfo(frame[0]).function
93            stack = '%s|%s' % (function_name, stack)
94        del stack_frames
95        return stack[:-1] # Delete the last '|' character
96
97    def _verbose_logger_command(self, command):
98        """
99        Prepend the command for the client with information about the ssh
100        command to be executed and the server stack state.
101
102        @param command: the ssh command to be executed.
103        """
104        # The last 3 frames on the stack are boring. Print 6-3=3 stack frames.
105        stack = self._get_server_stack_state(lowest_frames=3, highest_frames=6)
106        # If "logger" executable exists on the DUT use it to respew |command|.
107        # Then regardless of "logger" run |command| as usual.
108        command = ('if type "logger" > /dev/null 2>&1; then'
109                   ' logger -tag "autotest" "server[stack::%s] -> ssh_run(%s)";'
110                   'fi; '
111                   '%s' % (stack, utils.sh_escape(command), command))
112        return command
113
114
115    def _run(self, command, timeout, ignore_status,
116             stdout, stderr, connect_timeout, env, options, stdin, args,
117             ignore_timeout, ssh_failure_retry_ok):
118        """Helper function for run()."""
119        ssh_cmd = self.ssh_command(connect_timeout, options)
120        if not env.strip():
121            env = ""
122        else:
123            env = "export %s;" % env
124        for arg in args:
125            command += ' "%s"' % utils.sh_escape(arg)
126        full_cmd = '%s "%s %s"' % (ssh_cmd, env, utils.sh_escape(command))
127
128        # TODO(jrbarnette):  crbug.com/484726 - When we're in an SSP
129        # container, sometimes shortly after reboot we will see DNS
130        # resolution errors on ssh commands; the problem never
131        # occurs more than once in a row.  This especially affects
132        # the autoupdate_Rollback test, but other cases have been
133        # affected, too.
134        #
135        # We work around it by detecting the first DNS resolution error
136        # and retrying exactly one time.
137        dns_error_retry_count = 1
138
139        def counters_inc(counter_name, failure_name):
140            """Helper function to increment metrics counters.
141            @param counter_name: string indicating which counter to use
142            @param failure_name: string indentifying an error, or 'success'
143            """
144            if counter_name == 'call':
145                # ssh_counter records the outcome of each ssh invocation
146                # inside _run(), including exceptions.
147                ssh_counter = metrics.Counter('chromeos/autotest/ssh/calls')
148                fields = {'error' : failure_name or 'success',
149                          'attempt' : ssh_call_count}
150                ssh_counter.increment(fields=fields)
151
152            if counter_name == 'run':
153                # run_counter records each call to _run() with its result
154                # and how many tries were made.  Calls are recorded when
155                # _run() exits (including exiting with an exception)
156                run_counter = metrics.Counter('chromeos/autotest/ssh/runs')
157                fields = {'error' : failure_name or 'success',
158                          'attempt' : ssh_call_count}
159                run_counter.increment(fields=fields)
160
161        # If ssh_failure_retry_ok is True, retry twice on timeouts and generic
162        # error 255: if a simple retry doesn't work, kill the ssh master
163        # connection and try again.  (Note that either error could come from
164        # the command running in the DUT, in which case the retry may be
165        # useless but, in theory, also harmless.)
166        if ssh_failure_retry_ok:
167            # Ignore ssh command timeout, even though it could be a timeout due
168            # to the command executing in the remote host.  Note that passing
169            # ignore_timeout = True makes utils.run() return None on timeouts
170            # (and only on timeouts).
171            original_ignore_timeout = ignore_timeout
172            ignore_timeout = True
173            ssh_failure_retry_count = 2
174        else:
175            ssh_failure_retry_count = 0
176
177        ssh_call_count = 0
178
179        while True:
180            try:
181                # Increment call count first, in case utils.run() throws an
182                # exception.
183                ssh_call_count += 1
184                result = utils.run(full_cmd, timeout, True, stdout, stderr,
185                                   verbose=False, stdin=stdin,
186                                   stderr_is_expected=ignore_status,
187                                   ignore_timeout=ignore_timeout)
188            except Exception as e:
189                # No retries on exception.
190                counters_inc('call', 'exception')
191                counters_inc('run', 'exception')
192                raise e
193
194            failure_name = None
195
196            if result:
197                if result.exit_status == 255:
198                    if re.search(r'^ssh: .*: Name or service not known',
199                                 result.stderr):
200                        failure_name = 'dns_failure'
201                    else:
202                        failure_name = 'error_255'
203                elif result.exit_status > 0:
204                    failure_name = 'nonzero_status'
205            else:
206                # result == None
207                failure_name = 'timeout'
208
209            # Record the outcome of the ssh invocation.
210            counters_inc('call', failure_name)
211
212            if failure_name:
213                # There was a failure: decide whether to retry.
214                if failure_name == 'dns_failure':
215                    if dns_error_retry_count > 0:
216                        logging.debug('retrying ssh because of DNS failure')
217                        dns_error_retry_count -= 1
218                        continue
219                else:
220                    if ssh_failure_retry_count == 2:
221                        logging.debug('retrying ssh command after %s',
222                                       failure_name)
223                        ssh_failure_retry_count -= 1
224                        continue
225                    elif ssh_failure_retry_count == 1:
226                        # After two failures, restart the master connection
227                        # before the final try.
228                        logging.debug('retry 2: restarting master connection')
229                        self.restart_master_ssh()
230                        # Last retry: reinstate timeout behavior.
231                        ignore_timeout = original_ignore_timeout
232                        ssh_failure_retry_count -= 1
233                        continue
234
235            # No retry conditions occurred.  Exit the loop.
236            break
237
238        # The outcomes of ssh invocations have been recorded.  Now record
239        # the outcome of this function.
240
241        if ignore_timeout and not result:
242            counters_inc('run', 'ignored_timeout')
243            return None
244
245        # The error messages will show up in band (indistinguishable
246        # from stuff sent through the SSH connection), so we have the
247        # remote computer echo the message "Connected." before running
248        # any command.  Since the following 2 errors have to do with
249        # connecting, it's safe to do these checks.
250        if result.exit_status == 255:
251            if re.search(r'^ssh: connect to host .* port .*: '
252                         r'Connection timed out\r$', result.stderr):
253                counters_inc('run', 'final_timeout')
254                raise error.AutoservSSHTimeout("ssh timed out", result)
255            if "Permission denied." in result.stderr:
256                msg = "ssh permission denied"
257                counters_inc('run', 'final_eperm')
258                raise error.AutoservSshPermissionDeniedError(msg, result)
259
260        if not ignore_status and result.exit_status > 0:
261            counters_inc('run', 'final_run_error')
262            raise error.AutoservRunError("command execution error", result)
263
264        counters_inc('run', failure_name)
265        return result
266
267
268    def run_very_slowly(self, command, timeout=3600, ignore_status=False,
269            stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
270            connect_timeout=30, options='', stdin=None, verbose=True, args=(),
271            ignore_timeout=False, ssh_failure_retry_ok=False):
272        """
273        Run a command on the remote host.
274        This RPC call has an overhead of minimum 40ms and up to 400ms on
275        servers (crbug.com/734887). Each time a run_very_slowly is added for
276        every job - a server core dies in the lab.
277        @see common_lib.hosts.host.run()
278
279        @param timeout: command execution timeout
280        @param connect_timeout: ssh connection timeout (in seconds)
281        @param options: string with additional ssh command options
282        @param verbose: log the commands
283        @param ignore_timeout: bool True if SSH command timeouts should be
284                ignored.  Will return None on command timeout.
285        @param ssh_failure_retry_ok: True if the command may be retried on
286                probable ssh failure (error 255 or timeout).  When true,
287                the command may be executed up to three times, the second
288                time after restarting the ssh master connection.  Use only for
289                commands that are idempotent, because when a "probable
290                ssh failure" occurs, we cannot tell if the command executed
291                or not.
292
293        @raises AutoservRunError: if the command failed
294        @raises AutoservSSHTimeout: ssh connection has timed out
295        """
296        with metrics.SecondsTimer('chromeos/autotest/ssh/master_ssh_time',
297                                  scale=0.001):
298            if verbose:
299                stack = self._get_server_stack_state(lowest_frames=1,
300                                                     highest_frames=7)
301                logging.debug("Running (ssh) '%s' from '%s'", command, stack)
302                command = self._verbose_logger_command(command)
303
304            # Start a master SSH connection if necessary.
305            self.start_master_ssh()
306
307            env = " ".join("=".join(pair) for pair in self.env.iteritems())
308            try:
309                return self._run(command, timeout, ignore_status,
310                                 stdout_tee, stderr_tee, connect_timeout, env,
311                                 options, stdin, args, ignore_timeout,
312                                 ssh_failure_retry_ok)
313            except error.CmdError, cmderr:
314                # We get a CmdError here only if there is timeout of that
315                # command. Catch that and stuff it into AutoservRunError and
316                # raise it.
317                timeout_message = str('Timeout encountered: %s' %
318                                      cmderr.args[0])
319                raise error.AutoservRunError(timeout_message, cmderr.args[1])
320
321
322    def run(self, *args, **kwargs):
323        return self.run_very_slowly(*args, **kwargs)
324
325
326    def run_background(self, command, verbose=True):
327        """Start a command on the host in the background.
328
329        The command is started on the host in the background, and
330        this method call returns immediately without waiting for the
331        command's completion.  The PID of the process on the host is
332        returned as a string.
333
334        The command may redirect its stdin, stdout, or stderr as
335        necessary.  Without redirection, all input and output will
336        use /dev/null.
337
338        @param command The command to run in the background
339        @param verbose As for `self.run()`
340
341        @return Returns the PID of the remote background process
342                as a string.
343        """
344        # Redirection here isn't merely hygienic; it's a functional
345        # requirement.  sshd won't terminate until stdin, stdout,
346        # and stderr are all closed.
347        #
348        # The subshell is needed to do the right thing in case the
349        # passed in command has its own I/O redirections.
350        cmd_fmt = '( %s ) </dev/null >/dev/null 2>&1 & echo -n $!'
351        return self.run(cmd_fmt % command, verbose=verbose).stdout
352
353
354    def run_short(self, command, **kwargs):
355        """
356        Calls the run() command with a short default timeout.
357
358        Takes the same arguments as does run(),
359        with the exception of the timeout argument which
360        here is fixed at 60 seconds.
361        It returns the result of run.
362
363        @param command: the command line string
364
365        """
366        return self.run(command, timeout=60, **kwargs)
367
368
369    def run_grep(self, command, timeout=30, ignore_status=False,
370                 stdout_ok_regexp=None, stdout_err_regexp=None,
371                 stderr_ok_regexp=None, stderr_err_regexp=None,
372                 connect_timeout=30):
373        """
374        Run a command on the remote host and look for regexp
375        in stdout or stderr to determine if the command was
376        successul or not.
377
378
379        @param command: the command line string
380        @param timeout: time limit in seconds before attempting to
381                        kill the running process. The run() function
382                        will take a few seconds longer than 'timeout'
383                        to complete if it has to kill the process.
384        @param ignore_status: do not raise an exception, no matter
385                              what the exit code of the command is.
386        @param stdout_ok_regexp: regexp that should be in stdout
387                                 if the command was successul.
388        @param stdout_err_regexp: regexp that should be in stdout
389                                  if the command failed.
390        @param stderr_ok_regexp: regexp that should be in stderr
391                                 if the command was successul.
392        @param stderr_err_regexp: regexp that should be in stderr
393                                 if the command failed.
394        @param connect_timeout: connection timeout (in seconds)
395
396        Returns:
397                if the command was successul, raises an exception
398                otherwise.
399
400        Raises:
401                AutoservRunError:
402                - the exit code of the command execution was not 0.
403                - If stderr_err_regexp is found in stderr,
404                - If stdout_err_regexp is found in stdout,
405                - If stderr_ok_regexp is not found in stderr.
406                - If stdout_ok_regexp is not found in stdout,
407        """
408
409        # We ignore the status, because we will handle it at the end.
410        result = self.run(command, timeout, ignore_status=True,
411                          connect_timeout=connect_timeout)
412
413        # Look for the patterns, in order
414        for (regexp, stream) in ((stderr_err_regexp, result.stderr),
415                                 (stdout_err_regexp, result.stdout)):
416            if regexp and stream:
417                err_re = re.compile (regexp)
418                if err_re.search(stream):
419                    raise error.AutoservRunError(
420                        '%s failed, found error pattern: "%s"' % (command,
421                                                                regexp), result)
422
423        for (regexp, stream) in ((stderr_ok_regexp, result.stderr),
424                                 (stdout_ok_regexp, result.stdout)):
425            if regexp and stream:
426                ok_re = re.compile (regexp)
427                if ok_re.search(stream):
428                    if ok_re.search(stream):
429                        return
430
431        if not ignore_status and result.exit_status > 0:
432            raise error.AutoservRunError("command execution error", result)
433
434
435    def setup_ssh_key(self):
436        """Setup SSH Key"""
437        logging.debug('Performing SSH key setup on %s as %s.',
438                      self.host_port, self.user)
439
440        try:
441            host = pxssh.pxssh()
442            host.login(self.hostname, self.user, self.password,
443                        port=self.port)
444            public_key = utils.get_public_key()
445
446            host.sendline('mkdir -p ~/.ssh')
447            host.prompt()
448            host.sendline('chmod 700 ~/.ssh')
449            host.prompt()
450            host.sendline("echo '%s' >> ~/.ssh/authorized_keys; " %
451                            public_key)
452            host.prompt()
453            host.sendline('chmod 600 ~/.ssh/authorized_keys')
454            host.prompt()
455            host.logout()
456
457            logging.debug('SSH key setup complete.')
458
459        except:
460            logging.debug('SSH key setup has failed.')
461            try:
462                host.logout()
463            except:
464                pass
465
466
467    def setup_ssh(self):
468        """Setup SSH"""
469        if self.password:
470            try:
471                self.ssh_ping()
472            except error.AutoservSshPingHostError:
473                self.setup_ssh_key()
474