• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#
2# Copyright 2007 Google Inc. Released under the GPL v2
3
4"""
5This module defines the SSHHost class.
6
7Implementation details:
8You should import the "hosts" package instead of importing each type of host.
9
10        SSHHost: a remote machine with a ssh access
11"""
12
13import inspect
14import logging
15import re
16import warnings
17from autotest_lib.client.common_lib import error
18from autotest_lib.client.common_lib import pxssh
19from autotest_lib.server import utils
20from autotest_lib.server.hosts import abstract_ssh
21
22# In case cros_host is being ran via SSP on an older Moblab version with an
23# older chromite version.
24try:
25    from chromite.lib import metrics
26except ImportError:
27    metrics = utils.metrics_mock
28
29
30class SSHHost(abstract_ssh.AbstractSSHHost):
31    """
32    This class represents a remote machine controlled through an ssh
33    session on which you can run programs.
34
35    It is not the machine autoserv is running on. The machine must be
36    configured for password-less login, for example through public key
37    authentication.
38
39    It includes support for controlling the machine through a serial
40    console on which you can run programs. If such a serial console is
41    set up on the machine then capabilities such as hard reset and
42    boot strap monitoring are available. If the machine does not have a
43    serial console available then ordinary SSH-based commands will
44    still be available, but attempts to use extensions such as
45    console logging or hard reset will fail silently.
46
47    Implementation details:
48    This is a leaf class in an abstract class hierarchy, it must
49    implement the unimplemented methods in parent classes.
50    """
51
52    def _initialize(self, hostname, *args, **dargs):
53        """
54        Construct a SSHHost object
55
56        Args:
57                hostname: network hostname or address of remote machine
58        """
59        super(SSHHost, self)._initialize(hostname=hostname, *args, **dargs)
60        self.setup_ssh()
61
62
63    def ssh_command(self, connect_timeout=30, options='', alive_interval=300):
64        """
65        Construct an ssh command with proper args for this host.
66
67        @param connect_timeout: connection timeout (in seconds)
68        @param options: SSH options
69        @param alive_interval: SSH Alive interval.
70        """
71        options = "%s %s" % (options, self._master_ssh.ssh_option)
72        base_cmd = self.make_ssh_command(user=self.user, port=self.port,
73                                         opts=options,
74                                         hosts_file=self.known_hosts_file,
75                                         connect_timeout=connect_timeout,
76                                         alive_interval=alive_interval)
77        return "%s %s" % (base_cmd, self.hostname)
78
79    def _get_server_stack_state(self, lowest_frames=0, highest_frames=None):
80        """ Get the server stack frame status.
81        @param lowest_frames: the lowest frames to start printing.
82        @param highest_frames: the highest frames to print.
83                        (None means no restriction)
84        """
85        stack_frames = inspect.stack()
86        stack = ''
87        for frame in stack_frames[lowest_frames:highest_frames]:
88            function_name = inspect.getframeinfo(frame[0]).function
89            stack = '%s|%s' % (function_name, stack)
90        del stack_frames
91        return stack[:-1] # Delete the last '|' character
92
93    def _verbose_logger_command(self, command):
94        """
95        Prepend the command for the client with information about the ssh command
96        to be executed and the server stack state.
97
98        @param command: the ssh command to be executed.
99        """
100        # The last 3 frames on the stack are boring. Print 6-3=3 stack frames.
101        stack = self._get_server_stack_state(lowest_frames=3, highest_frames=6)
102        # If "logger" executable exists on the DUT use it to respew |command|.
103        # Then regardless of "logger" run |command| as usual.
104        command = ('if type "logger" > /dev/null 2>&1; then'
105                   ' logger -tag "autotest" "server[stack::%s] -> ssh_run(%s)";'
106                   'fi; '
107                   '%s' % (stack, utils.sh_escape(command), command))
108        return command
109
110
111    def _run(self, command, timeout, ignore_status,
112             stdout, stderr, connect_timeout, env, options, stdin, args,
113             ignore_timeout, ssh_failure_retry_ok):
114        """Helper function for run()."""
115        ssh_cmd = self.ssh_command(connect_timeout, options)
116        if not env.strip():
117            env = ""
118        else:
119            env = "export %s;" % env
120        for arg in args:
121            command += ' "%s"' % utils.sh_escape(arg)
122        full_cmd = '%s "%s %s"' % (ssh_cmd, env, utils.sh_escape(command))
123
124        # TODO(jrbarnette):  crbug.com/484726 - When we're in an SSP
125        # container, sometimes shortly after reboot we will see DNS
126        # resolution errors on ssh commands; the problem never
127        # occurs more than once in a row.  This especially affects
128        # the autoupdate_Rollback test, but other cases have been
129        # affected, too.
130        #
131        # We work around it by detecting the first DNS resolution error
132        # and retrying exactly one time.
133        dns_error_retry_count = 1
134
135        def counters_inc(counter_name, failure_name):
136            """Helper function to increment metrics counters.
137            @param counter_name: string indicating which counter to use
138            @param failure_name: string indentifying an error, or 'success'
139            """
140            if counter_name == 'call':
141                # ssh_counter records the outcome of each ssh invocation
142                # inside _run(), including exceptions.
143                ssh_counter = metrics.Counter('chromeos/autotest/ssh/calls')
144                fields = {'error' : failure_name or 'success',
145                          'attempt' : ssh_call_count}
146                ssh_counter.increment(fields=fields)
147
148            if counter_name == 'run':
149                # run_counter records each call to _run() with its result
150                # and how many tries were made.  Calls are recorded when
151                # _run() exits (including exiting with an exception)
152                run_counter = metrics.Counter('chromeos/autotest/ssh/runs')
153                fields = {'error' : failure_name or 'success',
154                          'attempt' : ssh_call_count}
155                run_counter.increment(fields=fields)
156
157        # If ssh_failure_retry_ok is True, retry twice on timeouts and generic
158        # error 255: if a simple retry doesn't work, kill the ssh master
159        # connection and try again.  (Note that either error could come from
160        # the command running in the DUT, in which case the retry may be
161        # useless but, in theory, also harmless.)
162        if ssh_failure_retry_ok:
163            # Ignore ssh command timeout, even though it could be a timeout due
164            # to the command executing in the remote host.  Note that passing
165            # ignore_timeout = True makes utils.run() return None on timeouts
166            # (and only on timeouts).
167            original_ignore_timeout = ignore_timeout
168            ignore_timeout = True
169            ssh_failure_retry_count = 2
170        else:
171            ssh_failure_retry_count = 0
172
173        ssh_call_count = 0
174
175        while True:
176            try:
177                # Increment call count first, in case utils.run() throws an
178                # exception.
179                ssh_call_count += 1
180                result = utils.run(full_cmd, timeout, True, stdout, stderr,
181                                   verbose=False, stdin=stdin,
182                                   stderr_is_expected=ignore_status,
183                                   ignore_timeout=ignore_timeout)
184            except Exception as e:
185                # No retries on exception.
186                counters_inc('call', 'exception')
187                counters_inc('run', 'exception')
188                raise e
189
190            failure_name = None
191
192            if result:
193                if result.exit_status == 255:
194                    if re.search(r'^ssh: .*: Name or service not known',
195                                 result.stderr):
196                        failure_name = 'dns_failure'
197                    else:
198                        failure_name = 'error_255'
199                elif result.exit_status > 0:
200                    failure_name = 'nonzero_status'
201            else:
202                # result == None
203                failure_name = 'timeout'
204
205            # Record the outcome of the ssh invocation.
206            counters_inc('call', failure_name)
207
208            if failure_name:
209                # There was a failure: decide whether to retry.
210                if failure_name == 'dns_failure':
211                    if dns_error_retry_count > 0:
212                        logging.debug('retrying ssh because of DNS failure')
213                        dns_error_retry_count -= 1
214                        continue
215                else:
216                    if ssh_failure_retry_count == 2:
217                        logging.debug('retrying ssh command after %s',
218                                       failure_name)
219                        ssh_failure_retry_count -= 1
220                        continue
221                    elif ssh_failure_retry_count == 1:
222                        # After two failures, restart the master connection
223                        # before the final try.
224                        logging.debug('retry 2: restarting master connection')
225                        self.restart_master_ssh()
226                        # Last retry: reinstate timeout behavior.
227                        ignore_timeout = original_ignore_timeout
228                        ssh_failure_retry_count -= 1
229                        continue
230
231            # No retry conditions occurred.  Exit the loop.
232            break
233
234        # The outcomes of ssh invocations have been recorded.  Now record
235        # the outcome of this function.
236
237        if ignore_timeout and not result:
238            counters_inc('run', 'ignored_timeout')
239            return None
240
241        # The error messages will show up in band (indistinguishable
242        # from stuff sent through the SSH connection), so we have the
243        # remote computer echo the message "Connected." before running
244        # any command.  Since the following 2 errors have to do with
245        # connecting, it's safe to do these checks.
246        if result.exit_status == 255:
247            if re.search(r'^ssh: connect to host .* port .*: '
248                         r'Connection timed out\r$', result.stderr):
249                counters_inc('run', 'final_timeout')
250                raise error.AutoservSSHTimeout("ssh timed out", result)
251            if "Permission denied." in result.stderr:
252                msg = "ssh permission denied"
253                counters_inc('run', 'final_eperm')
254                raise error.AutoservSshPermissionDeniedError(msg, result)
255
256        if not ignore_status and result.exit_status > 0:
257            counters_inc('run', 'final_run_error')
258            raise error.AutoservRunError("command execution error", result)
259
260        counters_inc('run', failure_name)
261        return result
262
263
264    @metrics.SecondsTimerDecorator(
265            'chromeos/autotest/ssh/master_ssh_time')
266    def run_very_slowly(self, command, timeout=3600, ignore_status=False,
267            stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
268            connect_timeout=30, options='', stdin=None, verbose=True, args=(),
269            ignore_timeout=False, ssh_failure_retry_ok=False):
270        """
271        Run a command on the remote host.
272        This RPC call has an overhead of minimum 40ms and up to 400ms on
273        servers (crbug.com/734887). Each time a run_very_slowly is added for
274        every job - a server core dies in the lab.
275        @see common_lib.hosts.host.run()
276
277        @param timeout: command execution timeout
278        @param connect_timeout: ssh connection timeout (in seconds)
279        @param options: string with additional ssh command options
280        @param verbose: log the commands
281        @param ignore_timeout: bool True if SSH command timeouts should be
282                ignored.  Will return None on command timeout.
283        @param ssh_failure_retry_ok: True if the command may be retried on
284                probable ssh failure (error 255 or timeout).  When true,
285                the command may be executed up to three times, the second
286                time after restarting the ssh master connection.  Use only for
287                commands that are idempotent, because when a "probable
288                ssh failure" occurs, we cannot tell if the command executed
289                or not.
290
291        @raises AutoservRunError: if the command failed
292        @raises AutoservSSHTimeout: ssh connection has timed out
293        """
294        if verbose:
295            stack = self._get_server_stack_state(lowest_frames=1, highest_frames=7)
296            logging.debug("Running (ssh) '%s' from '%s'", command, stack)
297            command = self._verbose_logger_command(command)
298
299        # Start a master SSH connection if necessary.
300        self.start_master_ssh()
301
302        env = " ".join("=".join(pair) for pair in self.env.iteritems())
303        try:
304            return self._run(command, timeout, ignore_status,
305                             stdout_tee, stderr_tee, connect_timeout, env,
306                             options, stdin, args, ignore_timeout,
307                             ssh_failure_retry_ok)
308        except error.CmdError, cmderr:
309            # We get a CmdError here only if there is timeout of that command.
310            # Catch that and stuff it into AutoservRunError and raise it.
311            timeout_message = str('Timeout encountered: %s' % cmderr.args[0])
312            raise error.AutoservRunError(timeout_message, cmderr.args[1])
313
314
315    def run(self, *args, **kwargs):
316        return self.run_very_slowly(*args, **kwargs)
317
318
319    def run_background(self, command, verbose=True):
320        """Start a command on the host in the background.
321
322        The command is started on the host in the background, and
323        this method call returns immediately without waiting for the
324        command's completion.  The PID of the process on the host is
325        returned as a string.
326
327        The command may redirect its stdin, stdout, or stderr as
328        necessary.  Without redirection, all input and output will
329        use /dev/null.
330
331        @param command The command to run in the background
332        @param verbose As for `self.run()`
333
334        @return Returns the PID of the remote background process
335                as a string.
336        """
337        # Redirection here isn't merely hygienic; it's a functional
338        # requirement.  sshd won't terminate until stdin, stdout,
339        # and stderr are all closed.
340        #
341        # The subshell is needed to do the right thing in case the
342        # passed in command has its own I/O redirections.
343        cmd_fmt = '( %s ) </dev/null >/dev/null 2>&1 & echo -n $!'
344        return self.run(cmd_fmt % command, verbose=verbose).stdout
345
346
347    def run_short(self, command, **kwargs):
348        """
349        Calls the run() command with a short default timeout.
350
351        Takes the same arguments as does run(),
352        with the exception of the timeout argument which
353        here is fixed at 60 seconds.
354        It returns the result of run.
355
356        @param command: the command line string
357
358        """
359        return self.run(command, timeout=60, **kwargs)
360
361
362    def run_grep(self, command, timeout=30, ignore_status=False,
363                 stdout_ok_regexp=None, stdout_err_regexp=None,
364                 stderr_ok_regexp=None, stderr_err_regexp=None,
365                 connect_timeout=30):
366        """
367        Run a command on the remote host and look for regexp
368        in stdout or stderr to determine if the command was
369        successul or not.
370
371
372        @param command: the command line string
373        @param timeout: time limit in seconds before attempting to
374                        kill the running process. The run() function
375                        will take a few seconds longer than 'timeout'
376                        to complete if it has to kill the process.
377        @param ignore_status: do not raise an exception, no matter
378                              what the exit code of the command is.
379        @param stdout_ok_regexp: regexp that should be in stdout
380                                 if the command was successul.
381        @param stdout_err_regexp: regexp that should be in stdout
382                                  if the command failed.
383        @param stderr_ok_regexp: regexp that should be in stderr
384                                 if the command was successul.
385        @param stderr_err_regexp: regexp that should be in stderr
386                                 if the command failed.
387        @param connect_timeout: connection timeout (in seconds)
388
389        Returns:
390                if the command was successul, raises an exception
391                otherwise.
392
393        Raises:
394                AutoservRunError:
395                - the exit code of the command execution was not 0.
396                - If stderr_err_regexp is found in stderr,
397                - If stdout_err_regexp is found in stdout,
398                - If stderr_ok_regexp is not found in stderr.
399                - If stdout_ok_regexp is not found in stdout,
400        """
401
402        # We ignore the status, because we will handle it at the end.
403        result = self.run(command, timeout, ignore_status=True,
404                          connect_timeout=connect_timeout)
405
406        # Look for the patterns, in order
407        for (regexp, stream) in ((stderr_err_regexp, result.stderr),
408                                 (stdout_err_regexp, result.stdout)):
409            if regexp and stream:
410                err_re = re.compile (regexp)
411                if err_re.search(stream):
412                    raise error.AutoservRunError(
413                        '%s failed, found error pattern: "%s"' % (command,
414                                                                regexp), result)
415
416        for (regexp, stream) in ((stderr_ok_regexp, result.stderr),
417                                 (stdout_ok_regexp, result.stdout)):
418            if regexp and stream:
419                ok_re = re.compile (regexp)
420                if ok_re.search(stream):
421                    if ok_re.search(stream):
422                        return
423
424        if not ignore_status and result.exit_status > 0:
425            raise error.AutoservRunError("command execution error", result)
426
427
428    def setup_ssh_key(self):
429        """Setup SSH Key"""
430        logging.debug('Performing SSH key setup on %s:%d as %s.',
431                      self.hostname, self.port, self.user)
432
433        try:
434            host = pxssh.pxssh()
435            host.login(self.hostname, self.user, self.password,
436                        port=self.port)
437            public_key = utils.get_public_key()
438
439            host.sendline('mkdir -p ~/.ssh')
440            host.prompt()
441            host.sendline('chmod 700 ~/.ssh')
442            host.prompt()
443            host.sendline("echo '%s' >> ~/.ssh/authorized_keys; " %
444                            public_key)
445            host.prompt()
446            host.sendline('chmod 600 ~/.ssh/authorized_keys')
447            host.prompt()
448            host.logout()
449
450            logging.debug('SSH key setup complete.')
451
452        except:
453            logging.debug('SSH key setup has failed.')
454            try:
455                host.logout()
456            except:
457                pass
458
459
460    def setup_ssh(self):
461        """Setup SSH"""
462        if self.password:
463            try:
464                self.ssh_ping()
465            except error.AutoservSshPingHostError:
466                self.setup_ssh_key()
467