1# Lint as: python2, python3 2# 3# Copyright 2007 Google Inc. Released under the GPL v2 4 5""" 6This module defines the SSHHost class. 7 8Implementation details: 9You should import the "hosts" package instead of importing each type of host. 10 11 SSHHost: a remote machine with a ssh access 12""" 13 14from __future__ import absolute_import 15from __future__ import division 16from __future__ import print_function 17 18import inspect 19import logging 20import re 21import time 22 23import common 24from autotest_lib.client.common_lib import error 25from autotest_lib.client.common_lib import pxssh 26from autotest_lib.server import utils 27from autotest_lib.server.hosts import abstract_ssh 28import six 29 30# In case cros_host is being ran via SSP on an older Moblab version with an 31# older chromite version. 32try: 33 from autotest_lib.utils.frozen_chromite.lib import metrics 34except ImportError: 35 metrics = utils.metrics_mock 36 37 38def THIS_IS_SLOW(func): 39 """Mark the given function as slow, when looking at calls to it""" 40 func.__name__ = '%s__SLOW__' % func.__name__ 41 return func 42 43 44class SSHHost(abstract_ssh.AbstractSSHHost): 45 """ 46 This class represents a remote machine controlled through an ssh 47 session on which you can run programs. 48 49 It is not the machine autoserv is running on. The machine must be 50 configured for password-less login, for example through public key 51 authentication. 52 53 It includes support for controlling the machine through a serial 54 console on which you can run programs. If such a serial console is 55 set up on the machine then capabilities such as hard reset and 56 boot strap monitoring are available. If the machine does not have a 57 serial console available then ordinary SSH-based commands will 58 still be available, but attempts to use extensions such as 59 console logging or hard reset will fail silently. 60 61 Implementation details: 62 This is a leaf class in an abstract class hierarchy, it must 63 implement the unimplemented methods in parent classes. 64 """ 65 RUN_TIMEOUT = 3600 66 67 def _initialize(self, hostname, *args, **dargs): 68 """ 69 Construct a SSHHost object 70 71 Args: 72 hostname: network hostname or address of remote machine 73 """ 74 super(SSHHost, self)._initialize(hostname=hostname, *args, **dargs) 75 self._default_run_timeout = self.RUN_TIMEOUT 76 self.setup_ssh() 77 78 79 def ssh_command(self, connect_timeout=30, options='', alive_interval=300, 80 alive_count_max=3, connection_attempts=1): 81 """ 82 Construct an ssh command with proper args for this host. 83 84 @param connect_timeout: connection timeout (in seconds) 85 @param options: SSH options 86 @param alive_interval: SSH Alive interval. 87 @param alive_count_max: SSH AliveCountMax. 88 @param connection_attempts: SSH ConnectionAttempts 89 """ 90 options = " ".join([options, self._main_ssh.ssh_option]) 91 base_cmd = self.make_ssh_command(user=self.user, port=self.port, 92 opts=options, 93 hosts_file=self.known_hosts_file, 94 connect_timeout=connect_timeout, 95 alive_interval=alive_interval, 96 alive_count_max=alive_count_max, 97 connection_attempts=connection_attempts) 98 return "%s %s" % (base_cmd, self.hostname) 99 100 def _get_server_stack_state(self, lowest_frames=0, highest_frames=None): 101 """ Get the server stack frame status. 102 @param lowest_frames: the lowest frames to start printing. 103 @param highest_frames: the highest frames to print. 104 (None means no restriction) 105 """ 106 stack_frames = inspect.stack() 107 stack = '' 108 for frame in stack_frames[lowest_frames:highest_frames]: 109 function_name = inspect.getframeinfo(frame[0]).function 110 stack = '%s|%s' % (function_name, stack) 111 del stack_frames 112 return stack[:-1] # Delete the last '|' character 113 114 def _verbose_logger_command(self, command): 115 """ 116 Prepend the command for the client with information about the ssh 117 command to be executed and the server stack state. 118 119 @param command: the ssh command to be executed. 120 """ 121 # The last few frames on the stack are not useful, so skip them. 122 stack = self._get_server_stack_state(lowest_frames=3, highest_frames=6) 123 # If logger executable exists on the DUT, use it to report the command. 124 # Then regardless of logger, run the command as usual. 125 command = ('test -x /usr/bin/logger && /usr/bin/logger' 126 ' -t autotest "from [%s] ssh_run: %s"; %s' 127 % (stack, utils.sh_escape(command), command)) 128 return command 129 130 def _tls_run(self, original_cmd, timeout, ignore_status, stdout, stderr, 131 args, ignore_timeout): 132 """Helper function for run(), uses the tls client.""" 133 if not self.tls_connection.alive: 134 raise error.TLSConnectionError("TLS not connected.") 135 original_cmd = ' '.join([original_cmd] + 136 [utils.sh_quote_word(arg) for arg in args]) 137 138 try: 139 result = self.tls_exec_dut_command_client.run_cmd(original_cmd, timeout, 140 stdout, stderr, 141 ignore_timeout) 142 except Exception as e: 143 logging.warning("TLS Client run err %s", e) 144 raise e 145 146 if not ignore_status and result.exit_status > 0: 147 msg = result.stderr.strip() 148 if not msg: 149 msg = result.stdout.strip() 150 if msg: 151 msg = msg.splitlines()[-1] 152 raise error.AutoservRunError( 153 "command execution error using TLS (%d): %s" % 154 (result.exit_status, msg), result) 155 156 return result 157 158 def _run(self, command, timeout, ignore_status, stdout, stderr, 159 connect_timeout, env, options, stdin, args, ignore_timeout, 160 ssh_failure_retry_ok, verbose): 161 """Helper function for run().""" 162 if connect_timeout > timeout: 163 # timeout passed from run() may be smaller than 1, because we 164 # subtract the elapsed time from the original timeout supplied. 165 connect_timeout = max(int(timeout), 1) 166 original_cmd = command 167 168 # If TLS client has been built, and not marked as unstable, use it. 169 # NOTE: if the tls_enabled setting in the config is not True, the 170 # client will not have been built. 171 use_tls = self.tls_exec_dut_command_client and not self.tls_unstable 172 173 if verbose: 174 stack = self._get_server_stack_state(lowest_frames=2, 175 highest_frames=8) 176 177 logging.debug("Running (via %s) '%s' from '%s'", 178 'TLS' if use_tls else 'SSH', command, stack) 179 command = self._verbose_logger_command(command) 180 181 if use_tls: 182 try: 183 return self._tls_run(command, timeout, ignore_status, stdout, 184 stderr, args, ignore_timeout) 185 except (error.AutoservRunError, error.CmdTimeoutError) as e: 186 raise e 187 except Exception as e: 188 # If TLS fails for unknown reason, we will revert to normal ssh. 189 logging.warning( 190 "Unexpected TLS cmd failed. Reverting to SSH.\n %s", e) 191 192 # Note the TLS as unstable so we do not attempt to re-start it. 193 self.tls_unstable = True 194 195 ssh_cmd = self.ssh_command(connect_timeout, options) 196 if not env.strip(): 197 env = "" 198 else: 199 env = "export %s;" % env 200 for arg in args: 201 command += ' "%s"' % utils.sh_escape(arg) 202 full_cmd = '%s "%s %s"' % (ssh_cmd, env, utils.sh_escape(command)) 203 204 def counters_inc(counter_name, failure_name): 205 """Helper function to increment metrics counters. 206 @param counter_name: string indicating which counter to use 207 @param failure_name: string indentifying an error, or 'success' 208 """ 209 if counter_name == 'call': 210 # ssh_counter records the outcome of each ssh invocation 211 # inside _run(), including exceptions. 212 ssh_counter = metrics.Counter('chromeos/autotest/ssh/calls') 213 fields = {'error' : failure_name or 'success', 214 'attempt' : ssh_call_count} 215 ssh_counter.increment(fields=fields) 216 217 if counter_name == 'run': 218 # run_counter records each call to _run() with its result 219 # and how many tries were made. Calls are recorded when 220 # _run() exits (including exiting with an exception) 221 run_counter = metrics.Counter('chromeos/autotest/ssh/runs') 222 fields = {'error' : failure_name or 'success', 223 'attempt' : ssh_call_count} 224 run_counter.increment(fields=fields) 225 226 # If ssh_failure_retry_ok is True, retry twice on timeouts and generic 227 # error 255: if a simple retry doesn't work, kill the ssh main 228 # connection and try again. (Note that either error could come from 229 # the command running in the DUT, in which case the retry may be 230 # useless but, in theory, also harmless.) 231 if ssh_failure_retry_ok: 232 # Ignore ssh command timeout, even though it could be a timeout due 233 # to the command executing in the remote host. Note that passing 234 # ignore_timeout = True makes utils.run() return None on timeouts 235 # (and only on timeouts). 236 original_ignore_timeout = ignore_timeout 237 ignore_timeout = True 238 ssh_failure_retry_count = 2 239 else: 240 ssh_failure_retry_count = 0 241 242 ssh_call_count = 0 243 244 while True: 245 try: 246 # Increment call count first, in case utils.run() throws an 247 # exception. 248 ssh_call_count += 1 249 result = utils.run(full_cmd, timeout, True, stdout, stderr, 250 verbose=False, stdin=stdin, 251 stderr_is_expected=ignore_status, 252 ignore_timeout=ignore_timeout) 253 except Exception as e: 254 # No retries on exception. 255 counters_inc('call', 'exception') 256 counters_inc('run', 'exception') 257 raise e 258 259 failure_name = None 260 261 if result: 262 if result.exit_status == 255: 263 if re.search(r'^ssh: .*: Name or service not known', 264 result.stderr): 265 failure_name = 'dns_failure' 266 else: 267 failure_name = 'error_255' 268 elif result.exit_status > 0: 269 failure_name = 'nonzero_status' 270 else: 271 # result == None 272 failure_name = 'timeout' 273 274 # Record the outcome of the ssh invocation. 275 counters_inc('call', failure_name) 276 277 if failure_name: 278 # There was a failure: decide whether to retry. 279 if failure_name == 'dns_failure': 280 raise error.AutoservSshDnsError("DNS Failure: ", result) 281 else: 282 if ssh_failure_retry_count == 2: 283 logging.debug('retrying ssh command after %s', 284 failure_name) 285 ssh_failure_retry_count -= 1 286 continue 287 elif ssh_failure_retry_count == 1: 288 # After two failures, restart the main connection 289 # before the final try. 290 stack = self._get_server_stack_state(lowest_frames=1, 291 highest_frames=7) 292 logging.debug( 293 'retry 2: restarting main connection from \'%s\'', 294 stack) 295 self.restart_main_ssh() 296 # Last retry: reinstate timeout behavior. 297 ignore_timeout = original_ignore_timeout 298 ssh_failure_retry_count -= 1 299 continue 300 301 # No retry conditions occurred. Exit the loop. 302 break 303 304 # The outcomes of ssh invocations have been recorded. Now record 305 # the outcome of this function. 306 307 if ignore_timeout and not result: 308 counters_inc('run', 'ignored_timeout') 309 return None 310 311 # The error messages will show up in band (indistinguishable 312 # from stuff sent through the SSH connection), so we have the 313 # remote computer echo the message "Connected." before running 314 # any command. Since the following 2 errors have to do with 315 # connecting, it's safe to do these checks. 316 if result.exit_status == 255: 317 if re.search(r'^ssh: connect to host .* port .*: ' 318 r'Connection timed out\r$', result.stderr): 319 counters_inc('run', 'final_timeout') 320 raise error.AutoservSSHTimeout( 321 "ssh timed out: %r" % original_cmd.strip(), result) 322 if "Permission denied." in result.stderr: 323 msg = "ssh permission denied" 324 counters_inc('run', 'final_eperm') 325 raise error.AutoservSshPermissionDeniedError(msg, result) 326 327 if not ignore_status and result.exit_status > 0: 328 counters_inc('run', 'final_run_error') 329 msg = result.stderr.strip() 330 if not msg: 331 msg = result.stdout.strip() 332 if msg: 333 msg = msg.splitlines()[-1] 334 raise error.AutoservRunError("command execution error (%d): %r" % 335 (result.exit_status, msg), result) 336 337 counters_inc('run', failure_name) 338 return result 339 340 def set_default_run_timeout(self, timeout): 341 """Set the default timeout for run.""" 342 if timeout < 0: 343 raise error.TestError('Invalid timeout %d', timeout) 344 self._default_run_timeout = timeout 345 346 @THIS_IS_SLOW 347 def run(self, command, timeout=None, ignore_status=False, 348 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS, 349 connect_timeout=30, options='', stdin=None, verbose=True, args=(), 350 ignore_timeout=False, ssh_failure_retry_ok=False): 351 """ 352 Run a command on the remote host. 353 @note: This RPC call has an overhead of minimum 40ms and up to 400ms on 354 servers (crbug.com/734887). Each time a call is added for 355 every job, a server core dies in the lab. 356 @see: common_lib.hosts.host.run() 357 358 @param timeout: command execution timeout in seconds. Default is 359 _default_run_timeout (1 hour). 360 @param connect_timeout: ssh connection timeout (in seconds) 361 @param options: string with additional ssh command options 362 @param verbose: log the commands 363 @param ignore_timeout: bool True if SSH command timeouts should be 364 ignored. Will return None on command timeout. 365 @param ssh_failure_retry_ok: True if the command may be retried on 366 probable ssh failure (error 255 or timeout). When true, 367 the command may be executed up to three times, the second 368 time after restarting the ssh main connection. Use only for 369 commands that are idempotent, because when a "probable 370 ssh failure" occurs, we cannot tell if the command executed 371 or not. 372 373 @raises AutoservRunError: if the command failed 374 @raises AutoservSSHTimeout: ssh connection has timed out 375 """ 376 # For example if the command is a list, we need to convert it to a 377 # string first. 378 if not isinstance(command, six.string_types): 379 command = ' '.join(command) 380 381 if timeout is None: 382 timeout = self._default_run_timeout 383 start_time = time.time() 384 with metrics.SecondsTimer('chromeos/autotest/ssh/main_ssh_time', 385 scale=0.001): 386 387 self.start_main_ssh(min( 388 timeout, 389 self.DEFAULT_START_MAIN_SSH_TIMEOUT_S, 390 )) 391 392 env = " ".join("=".join(pair) for pair in six.iteritems(self.env)) 393 elapsed = time.time() - start_time 394 try: 395 return self._run(command, timeout - elapsed, ignore_status, 396 stdout_tee, stderr_tee, connect_timeout, env, 397 options, stdin, args, ignore_timeout, 398 ssh_failure_retry_ok, verbose) 399 except error.CmdError as cmderr: 400 # We get a CmdError here only if there is timeout of that 401 # command. Catch that and stuff it into AutoservRunError and 402 # raise it. 403 timeout_message = str('Timeout encountered: %s' % 404 cmderr.args[0]) 405 raise error.AutoservRunError(timeout_message, cmderr.args[1]) 406 407 408 def run_background(self, command, verbose=True): 409 """Start a command on the host in the background. 410 411 The command is started on the host in the background, and 412 this method call returns immediately without waiting for the 413 command's completion. The PID of the process on the host is 414 returned as a string. 415 416 The command may redirect its stdin, stdout, or stderr as 417 necessary. Without redirection, all input and output will 418 use /dev/null. 419 420 @param command The command to run in the background 421 @param verbose As for `self.run()` 422 423 @return Returns the PID of the remote background process 424 as a string. 425 """ 426 # Redirection here isn't merely hygienic; it's a functional 427 # requirement. sshd won't terminate until stdin, stdout, 428 # and stderr are all closed. 429 # 430 # The subshell is needed to do the right thing in case the 431 # passed in command has its own I/O redirections. 432 cmd_fmt = '( %s ) </dev/null >/dev/null 2>&1 & echo -n $!' 433 return self.run(cmd_fmt % command, verbose=verbose).stdout 434 435 436 def run_short(self, command, **kwargs): 437 """ 438 Calls the run() command with a short default timeout. 439 440 Takes the same arguments as does run(), 441 with the exception of the timeout argument which 442 here is fixed at 60 seconds. 443 It returns the result of run. 444 445 @param command: the command line string 446 447 """ 448 return self.run(command, timeout=60, **kwargs) 449 450 451 def run_grep(self, command, timeout=30, ignore_status=False, 452 stdout_ok_regexp=None, stdout_err_regexp=None, 453 stderr_ok_regexp=None, stderr_err_regexp=None, 454 connect_timeout=30): 455 """ 456 Run a command on the remote host and look for regexp 457 in stdout or stderr to determine if the command was 458 successul or not. 459 460 461 @param command: the command line string 462 @param timeout: time limit in seconds before attempting to 463 kill the running process. The run() function 464 will take a few seconds longer than 'timeout' 465 to complete if it has to kill the process. 466 @param ignore_status: do not raise an exception, no matter 467 what the exit code of the command is. 468 @param stdout_ok_regexp: regexp that should be in stdout 469 if the command was successul. 470 @param stdout_err_regexp: regexp that should be in stdout 471 if the command failed. 472 @param stderr_ok_regexp: regexp that should be in stderr 473 if the command was successul. 474 @param stderr_err_regexp: regexp that should be in stderr 475 if the command failed. 476 @param connect_timeout: connection timeout (in seconds) 477 478 Returns: 479 if the command was successul, raises an exception 480 otherwise. 481 482 Raises: 483 AutoservRunError: 484 - the exit code of the command execution was not 0. 485 - If stderr_err_regexp is found in stderr, 486 - If stdout_err_regexp is found in stdout, 487 - If stderr_ok_regexp is not found in stderr. 488 - If stdout_ok_regexp is not found in stdout, 489 """ 490 491 # We ignore the status, because we will handle it at the end. 492 result = self.run(command, timeout, ignore_status=True, 493 connect_timeout=connect_timeout) 494 495 # Look for the patterns, in order 496 for (regexp, stream) in ((stderr_err_regexp, result.stderr), 497 (stdout_err_regexp, result.stdout)): 498 if regexp and stream: 499 err_re = re.compile (regexp) 500 if err_re.search(stream): 501 raise error.AutoservRunError( 502 '%r failed, found error pattern: %r' % (command, 503 regexp), result) 504 505 for (regexp, stream) in ((stderr_ok_regexp, result.stderr), 506 (stdout_ok_regexp, result.stdout)): 507 if regexp and stream: 508 ok_re = re.compile (regexp) 509 if ok_re.search(stream): 510 if ok_re.search(stream): 511 return 512 513 if not ignore_status and result.exit_status > 0: 514 msg = result.stderr.strip() 515 if not msg: 516 msg = result.stdout.strip() 517 if msg: 518 msg = msg.splitlines()[-1] 519 raise error.AutoservRunError("command execution error (%d): %r" % 520 (result.exit_status, msg), result) 521 522 523 def setup_ssh_key(self): 524 """Setup SSH Key""" 525 logging.debug('Performing SSH key setup on %s as %s.', 526 self.host_port, self.user) 527 528 try: 529 host = pxssh.pxssh() 530 host.login(self.hostname, self.user, self.password, 531 port=self.port) 532 public_key = utils.get_public_key() 533 534 host.sendline('mkdir -p ~/.ssh') 535 host.prompt() 536 host.sendline('chmod 700 ~/.ssh') 537 host.prompt() 538 host.sendline("echo '%s' >> ~/.ssh/authorized_keys; " % 539 public_key) 540 host.prompt() 541 host.sendline('chmod 600 ~/.ssh/authorized_keys') 542 host.prompt() 543 host.logout() 544 545 logging.debug('SSH key setup complete.') 546 547 except: 548 logging.debug('SSH key setup has failed.') 549 try: 550 host.logout() 551 except: 552 pass 553 554 555 def setup_ssh(self): 556 """Setup SSH""" 557 if self.password: 558 try: 559 self.ssh_ping() 560 except error.AutoservSshPingHostError: 561 self.setup_ssh_key() 562