1# 2# Copyright 2007 Google Inc. Released under the GPL v2 3 4""" 5This module defines the SSHHost class. 6 7Implementation details: 8You should import the "hosts" package instead of importing each type of host. 9 10 SSHHost: a remote machine with a ssh access 11""" 12 13import inspect 14import logging 15import re 16import time 17 18import common 19from autotest_lib.client.common_lib import error 20from autotest_lib.client.common_lib import pxssh 21from autotest_lib.server import utils 22from autotest_lib.server.hosts import abstract_ssh 23 24# In case cros_host is being ran via SSP on an older Moblab version with an 25# older chromite version. 26try: 27 from chromite.lib import metrics 28except ImportError: 29 metrics = utils.metrics_mock 30 31 32class SSHHost(abstract_ssh.AbstractSSHHost): 33 """ 34 This class represents a remote machine controlled through an ssh 35 session on which you can run programs. 36 37 It is not the machine autoserv is running on. The machine must be 38 configured for password-less login, for example through public key 39 authentication. 40 41 It includes support for controlling the machine through a serial 42 console on which you can run programs. If such a serial console is 43 set up on the machine then capabilities such as hard reset and 44 boot strap monitoring are available. If the machine does not have a 45 serial console available then ordinary SSH-based commands will 46 still be available, but attempts to use extensions such as 47 console logging or hard reset will fail silently. 48 49 Implementation details: 50 This is a leaf class in an abstract class hierarchy, it must 51 implement the unimplemented methods in parent classes. 52 """ 53 54 def _initialize(self, hostname, *args, **dargs): 55 """ 56 Construct a SSHHost object 57 58 Args: 59 hostname: network hostname or address of remote machine 60 """ 61 super(SSHHost, self)._initialize(hostname=hostname, *args, **dargs) 62 self.setup_ssh() 63 64 65 def ssh_command(self, connect_timeout=30, options='', alive_interval=300, 66 alive_count_max=3, connection_attempts=1): 67 """ 68 Construct an ssh command with proper args for this host. 69 70 @param connect_timeout: connection timeout (in seconds) 71 @param options: SSH options 72 @param alive_interval: SSH Alive interval. 73 @param alive_count_max: SSH AliveCountMax. 74 @param connection_attempts: SSH ConnectionAttempts 75 """ 76 options = " ".join([options, self._master_ssh.ssh_option]) 77 base_cmd = self.make_ssh_command(user=self.user, port=self.port, 78 opts=options, 79 hosts_file=self.known_hosts_file, 80 connect_timeout=connect_timeout, 81 alive_interval=alive_interval, 82 alive_count_max=alive_count_max, 83 connection_attempts=connection_attempts) 84 return "%s %s" % (base_cmd, self.hostname) 85 86 def _get_server_stack_state(self, lowest_frames=0, highest_frames=None): 87 """ Get the server stack frame status. 88 @param lowest_frames: the lowest frames to start printing. 89 @param highest_frames: the highest frames to print. 90 (None means no restriction) 91 """ 92 stack_frames = inspect.stack() 93 stack = '' 94 for frame in stack_frames[lowest_frames:highest_frames]: 95 function_name = inspect.getframeinfo(frame[0]).function 96 stack = '%s|%s' % (function_name, stack) 97 del stack_frames 98 return stack[:-1] # Delete the last '|' character 99 100 def _verbose_logger_command(self, command): 101 """ 102 Prepend the command for the client with information about the ssh 103 command to be executed and the server stack state. 104 105 @param command: the ssh command to be executed. 106 """ 107 # The last few frames on the stack are not useful, so skip them. 108 stack = self._get_server_stack_state(lowest_frames=3, highest_frames=6) 109 # If logger executable exists on the DUT, use it to report the command. 110 # Then regardless of logger, run the command as usual. 111 command = ('test -x /usr/bin/logger && /usr/bin/logger --id=$$ ' 112 '--tag=autotest "from [%s] ssh_run: %s"; %s' 113 % (stack, utils.sh_escape(command), command)) 114 return command 115 116 117 def _run(self, command, timeout, ignore_status, 118 stdout, stderr, connect_timeout, env, options, stdin, args, 119 ignore_timeout, ssh_failure_retry_ok): 120 """Helper function for run().""" 121 if connect_timeout > timeout: 122 connect_timeout = int(timeout) 123 original_cmd = command 124 125 ssh_cmd = self.ssh_command(connect_timeout, options) 126 if not env.strip(): 127 env = "" 128 else: 129 env = "export %s;" % env 130 for arg in args: 131 command += ' "%s"' % utils.sh_escape(arg) 132 full_cmd = '%s "%s %s"' % (ssh_cmd, env, utils.sh_escape(command)) 133 134 # TODO(jrbarnette): crbug.com/484726 - When we're in an SSP 135 # container, sometimes shortly after reboot we will see DNS 136 # resolution errors on ssh commands; the problem never 137 # occurs more than once in a row. This especially affects 138 # the autoupdate_Rollback test, but other cases have been 139 # affected, too. 140 # 141 # We work around it by detecting the first DNS resolution error 142 # and retrying exactly one time. 143 dns_error_retry_count = 1 144 145 def counters_inc(counter_name, failure_name): 146 """Helper function to increment metrics counters. 147 @param counter_name: string indicating which counter to use 148 @param failure_name: string indentifying an error, or 'success' 149 """ 150 if counter_name == 'call': 151 # ssh_counter records the outcome of each ssh invocation 152 # inside _run(), including exceptions. 153 ssh_counter = metrics.Counter('chromeos/autotest/ssh/calls') 154 fields = {'error' : failure_name or 'success', 155 'attempt' : ssh_call_count} 156 ssh_counter.increment(fields=fields) 157 158 if counter_name == 'run': 159 # run_counter records each call to _run() with its result 160 # and how many tries were made. Calls are recorded when 161 # _run() exits (including exiting with an exception) 162 run_counter = metrics.Counter('chromeos/autotest/ssh/runs') 163 fields = {'error' : failure_name or 'success', 164 'attempt' : ssh_call_count} 165 run_counter.increment(fields=fields) 166 167 # If ssh_failure_retry_ok is True, retry twice on timeouts and generic 168 # error 255: if a simple retry doesn't work, kill the ssh master 169 # connection and try again. (Note that either error could come from 170 # the command running in the DUT, in which case the retry may be 171 # useless but, in theory, also harmless.) 172 if ssh_failure_retry_ok: 173 # Ignore ssh command timeout, even though it could be a timeout due 174 # to the command executing in the remote host. Note that passing 175 # ignore_timeout = True makes utils.run() return None on timeouts 176 # (and only on timeouts). 177 original_ignore_timeout = ignore_timeout 178 ignore_timeout = True 179 ssh_failure_retry_count = 2 180 else: 181 ssh_failure_retry_count = 0 182 183 ssh_call_count = 0 184 185 while True: 186 try: 187 # Increment call count first, in case utils.run() throws an 188 # exception. 189 ssh_call_count += 1 190 result = utils.run(full_cmd, timeout, True, stdout, stderr, 191 verbose=False, stdin=stdin, 192 stderr_is_expected=ignore_status, 193 ignore_timeout=ignore_timeout) 194 except Exception as e: 195 # No retries on exception. 196 counters_inc('call', 'exception') 197 counters_inc('run', 'exception') 198 raise e 199 200 failure_name = None 201 202 if result: 203 if result.exit_status == 255: 204 if re.search(r'^ssh: .*: Name or service not known', 205 result.stderr): 206 failure_name = 'dns_failure' 207 else: 208 failure_name = 'error_255' 209 elif result.exit_status > 0: 210 failure_name = 'nonzero_status' 211 else: 212 # result == None 213 failure_name = 'timeout' 214 215 # Record the outcome of the ssh invocation. 216 counters_inc('call', failure_name) 217 218 if failure_name: 219 # There was a failure: decide whether to retry. 220 if failure_name == 'dns_failure': 221 if dns_error_retry_count > 0: 222 logging.debug('retrying ssh because of DNS failure') 223 dns_error_retry_count -= 1 224 continue 225 else: 226 if ssh_failure_retry_count == 2: 227 logging.debug('retrying ssh command after %s', 228 failure_name) 229 ssh_failure_retry_count -= 1 230 continue 231 elif ssh_failure_retry_count == 1: 232 # After two failures, restart the master connection 233 # before the final try. 234 logging.debug('retry 2: restarting master connection') 235 self.restart_master_ssh() 236 # Last retry: reinstate timeout behavior. 237 ignore_timeout = original_ignore_timeout 238 ssh_failure_retry_count -= 1 239 continue 240 241 # No retry conditions occurred. Exit the loop. 242 break 243 244 # The outcomes of ssh invocations have been recorded. Now record 245 # the outcome of this function. 246 247 if ignore_timeout and not result: 248 counters_inc('run', 'ignored_timeout') 249 return None 250 251 # The error messages will show up in band (indistinguishable 252 # from stuff sent through the SSH connection), so we have the 253 # remote computer echo the message "Connected." before running 254 # any command. Since the following 2 errors have to do with 255 # connecting, it's safe to do these checks. 256 if result.exit_status == 255: 257 if re.search(r'^ssh: connect to host .* port .*: ' 258 r'Connection timed out\r$', result.stderr): 259 counters_inc('run', 'final_timeout') 260 raise error.AutoservSSHTimeout( 261 "ssh timed out: %s" % original_cmd.strip(), result) 262 if "Permission denied." in result.stderr: 263 msg = "ssh permission denied" 264 counters_inc('run', 'final_eperm') 265 raise error.AutoservSshPermissionDeniedError(msg, result) 266 267 if not ignore_status and result.exit_status > 0: 268 counters_inc('run', 'final_run_error') 269 msg = result.stderr.strip() 270 if not msg: 271 msg = result.stdout.strip() 272 if msg: 273 msg = msg.splitlines()[-1] 274 raise error.AutoservRunError("command execution error (%d): %s" % 275 (result.exit_status, msg), result) 276 277 counters_inc('run', failure_name) 278 return result 279 280 281 def run_very_slowly(self, command, timeout=None, ignore_status=False, 282 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS, 283 connect_timeout=30, options='', stdin=None, verbose=True, args=(), 284 ignore_timeout=False, ssh_failure_retry_ok=False): 285 """ 286 Run a command on the remote host. 287 @note: This RPC call has an overhead of minimum 40ms and up to 400ms on 288 servers (crbug.com/734887). Each time a call is added for 289 every job, a server core dies in the lab. 290 @see: common_lib.hosts.host.run() 291 292 @param timeout: command execution timeout in seconds. Default is 1 hour. 293 @param connect_timeout: ssh connection timeout (in seconds) 294 @param options: string with additional ssh command options 295 @param verbose: log the commands 296 @param ignore_timeout: bool True if SSH command timeouts should be 297 ignored. Will return None on command timeout. 298 @param ssh_failure_retry_ok: True if the command may be retried on 299 probable ssh failure (error 255 or timeout). When true, 300 the command may be executed up to three times, the second 301 time after restarting the ssh master connection. Use only for 302 commands that are idempotent, because when a "probable 303 ssh failure" occurs, we cannot tell if the command executed 304 or not. 305 306 @raises AutoservRunError: if the command failed 307 @raises AutoservSSHTimeout: ssh connection has timed out 308 """ 309 if timeout is None: 310 timeout = 3600 311 start_time = time.time() 312 with metrics.SecondsTimer('chromeos/autotest/ssh/master_ssh_time', 313 scale=0.001): 314 if verbose: 315 stack = self._get_server_stack_state(lowest_frames=1, 316 highest_frames=7) 317 logging.debug("Running (ssh) '%s' from '%s'", command, stack) 318 command = self._verbose_logger_command(command) 319 320 self.start_master_ssh(min( 321 timeout, 322 self.DEFAULT_START_MASTER_SSH_TIMEOUT_S, 323 )) 324 325 env = " ".join("=".join(pair) for pair in self.env.iteritems()) 326 elapsed = time.time() - start_time 327 try: 328 return self._run(command, timeout - elapsed, ignore_status, 329 stdout_tee, stderr_tee, connect_timeout, env, 330 options, stdin, args, ignore_timeout, 331 ssh_failure_retry_ok) 332 except error.CmdError, cmderr: 333 # We get a CmdError here only if there is timeout of that 334 # command. Catch that and stuff it into AutoservRunError and 335 # raise it. 336 timeout_message = str('Timeout encountered: %s' % 337 cmderr.args[0]) 338 raise error.AutoservRunError(timeout_message, cmderr.args[1]) 339 340 341 run = run_very_slowly 342 343 344 def run_background(self, command, verbose=True): 345 """Start a command on the host in the background. 346 347 The command is started on the host in the background, and 348 this method call returns immediately without waiting for the 349 command's completion. The PID of the process on the host is 350 returned as a string. 351 352 The command may redirect its stdin, stdout, or stderr as 353 necessary. Without redirection, all input and output will 354 use /dev/null. 355 356 @param command The command to run in the background 357 @param verbose As for `self.run()` 358 359 @return Returns the PID of the remote background process 360 as a string. 361 """ 362 # Redirection here isn't merely hygienic; it's a functional 363 # requirement. sshd won't terminate until stdin, stdout, 364 # and stderr are all closed. 365 # 366 # The subshell is needed to do the right thing in case the 367 # passed in command has its own I/O redirections. 368 cmd_fmt = '( %s ) </dev/null >/dev/null 2>&1 & echo -n $!' 369 return self.run(cmd_fmt % command, verbose=verbose).stdout 370 371 372 def run_short(self, command, **kwargs): 373 """ 374 Calls the run() command with a short default timeout. 375 376 Takes the same arguments as does run(), 377 with the exception of the timeout argument which 378 here is fixed at 60 seconds. 379 It returns the result of run. 380 381 @param command: the command line string 382 383 """ 384 return self.run(command, timeout=60, **kwargs) 385 386 387 def run_grep(self, command, timeout=30, ignore_status=False, 388 stdout_ok_regexp=None, stdout_err_regexp=None, 389 stderr_ok_regexp=None, stderr_err_regexp=None, 390 connect_timeout=30): 391 """ 392 Run a command on the remote host and look for regexp 393 in stdout or stderr to determine if the command was 394 successul or not. 395 396 397 @param command: the command line string 398 @param timeout: time limit in seconds before attempting to 399 kill the running process. The run() function 400 will take a few seconds longer than 'timeout' 401 to complete if it has to kill the process. 402 @param ignore_status: do not raise an exception, no matter 403 what the exit code of the command is. 404 @param stdout_ok_regexp: regexp that should be in stdout 405 if the command was successul. 406 @param stdout_err_regexp: regexp that should be in stdout 407 if the command failed. 408 @param stderr_ok_regexp: regexp that should be in stderr 409 if the command was successul. 410 @param stderr_err_regexp: regexp that should be in stderr 411 if the command failed. 412 @param connect_timeout: connection timeout (in seconds) 413 414 Returns: 415 if the command was successul, raises an exception 416 otherwise. 417 418 Raises: 419 AutoservRunError: 420 - the exit code of the command execution was not 0. 421 - If stderr_err_regexp is found in stderr, 422 - If stdout_err_regexp is found in stdout, 423 - If stderr_ok_regexp is not found in stderr. 424 - If stdout_ok_regexp is not found in stdout, 425 """ 426 427 # We ignore the status, because we will handle it at the end. 428 result = self.run(command, timeout, ignore_status=True, 429 connect_timeout=connect_timeout) 430 431 # Look for the patterns, in order 432 for (regexp, stream) in ((stderr_err_regexp, result.stderr), 433 (stdout_err_regexp, result.stdout)): 434 if regexp and stream: 435 err_re = re.compile (regexp) 436 if err_re.search(stream): 437 raise error.AutoservRunError( 438 '%s failed, found error pattern: "%s"' % (command, 439 regexp), result) 440 441 for (regexp, stream) in ((stderr_ok_regexp, result.stderr), 442 (stdout_ok_regexp, result.stdout)): 443 if regexp and stream: 444 ok_re = re.compile (regexp) 445 if ok_re.search(stream): 446 if ok_re.search(stream): 447 return 448 449 if not ignore_status and result.exit_status > 0: 450 msg = result.stderr.strip() 451 if not msg: 452 msg = result.stdout.strip() 453 if msg: 454 msg = msg.splitlines()[-1] 455 raise error.AutoservRunError("command execution error (%d): %s" % 456 (result.exit_status, msg), result) 457 458 459 def setup_ssh_key(self): 460 """Setup SSH Key""" 461 logging.debug('Performing SSH key setup on %s as %s.', 462 self.host_port, self.user) 463 464 try: 465 host = pxssh.pxssh() 466 host.login(self.hostname, self.user, self.password, 467 port=self.port) 468 public_key = utils.get_public_key() 469 470 host.sendline('mkdir -p ~/.ssh') 471 host.prompt() 472 host.sendline('chmod 700 ~/.ssh') 473 host.prompt() 474 host.sendline("echo '%s' >> ~/.ssh/authorized_keys; " % 475 public_key) 476 host.prompt() 477 host.sendline('chmod 600 ~/.ssh/authorized_keys') 478 host.prompt() 479 host.logout() 480 481 logging.debug('SSH key setup complete.') 482 483 except: 484 logging.debug('SSH key setup has failed.') 485 try: 486 host.logout() 487 except: 488 pass 489 490 491 def setup_ssh(self): 492 """Setup SSH""" 493 if self.password: 494 try: 495 self.ssh_ping() 496 except error.AutoservSshPingHostError: 497 self.setup_ssh_key() 498