1# 2# Copyright 2007 Google Inc. Released under the GPL v2 3 4""" 5This module defines the SSHHost class. 6 7Implementation details: 8You should import the "hosts" package instead of importing each type of host. 9 10 SSHHost: a remote machine with a ssh access 11""" 12 13import inspect 14import logging 15import re 16import warnings 17from autotest_lib.client.common_lib import error 18from autotest_lib.client.common_lib import pxssh 19from autotest_lib.server import utils 20from autotest_lib.server.hosts import abstract_ssh 21 22# In case cros_host is being ran via SSP on an older Moblab version with an 23# older chromite version. 24try: 25 from chromite.lib import metrics 26except ImportError: 27 metrics = utils.metrics_mock 28 29 30class SSHHost(abstract_ssh.AbstractSSHHost): 31 """ 32 This class represents a remote machine controlled through an ssh 33 session on which you can run programs. 34 35 It is not the machine autoserv is running on. The machine must be 36 configured for password-less login, for example through public key 37 authentication. 38 39 It includes support for controlling the machine through a serial 40 console on which you can run programs. If such a serial console is 41 set up on the machine then capabilities such as hard reset and 42 boot strap monitoring are available. If the machine does not have a 43 serial console available then ordinary SSH-based commands will 44 still be available, but attempts to use extensions such as 45 console logging or hard reset will fail silently. 46 47 Implementation details: 48 This is a leaf class in an abstract class hierarchy, it must 49 implement the unimplemented methods in parent classes. 50 """ 51 52 def _initialize(self, hostname, *args, **dargs): 53 """ 54 Construct a SSHHost object 55 56 Args: 57 hostname: network hostname or address of remote machine 58 """ 59 super(SSHHost, self)._initialize(hostname=hostname, *args, **dargs) 60 self.setup_ssh() 61 62 63 def ssh_command(self, connect_timeout=30, options='', alive_interval=300): 64 """ 65 Construct an ssh command with proper args for this host. 66 67 @param connect_timeout: connection timeout (in seconds) 68 @param options: SSH options 69 @param alive_interval: SSH Alive interval. 70 """ 71 options = "%s %s" % (options, self._master_ssh.ssh_option) 72 base_cmd = self.make_ssh_command(user=self.user, port=self.port, 73 opts=options, 74 hosts_file=self.known_hosts_file, 75 connect_timeout=connect_timeout, 76 alive_interval=alive_interval) 77 return "%s %s" % (base_cmd, self.hostname) 78 79 def _get_server_stack_state(self, lowest_frames=0, highest_frames=None): 80 """ Get the server stack frame status. 81 @param lowest_frames: the lowest frames to start printing. 82 @param highest_frames: the highest frames to print. 83 (None means no restriction) 84 """ 85 stack_frames = inspect.stack() 86 stack = '' 87 for frame in stack_frames[lowest_frames:highest_frames]: 88 function_name = inspect.getframeinfo(frame[0]).function 89 stack = '%s|%s' % (function_name, stack) 90 del stack_frames 91 return stack[:-1] # Delete the last '|' character 92 93 def _verbose_logger_command(self, command): 94 """ 95 Prepend the command for the client with information about the ssh command 96 to be executed and the server stack state. 97 98 @param command: the ssh command to be executed. 99 """ 100 # The last 3 frames on the stack are boring. Print 6-3=3 stack frames. 101 stack = self._get_server_stack_state(lowest_frames=3, highest_frames=6) 102 # If "logger" executable exists on the DUT use it to respew |command|. 103 # Then regardless of "logger" run |command| as usual. 104 command = ('if type "logger" > /dev/null 2>&1; then' 105 ' logger -tag "autotest" "server[stack::%s] -> ssh_run(%s)";' 106 'fi; ' 107 '%s' % (stack, utils.sh_escape(command), command)) 108 return command 109 110 111 def _run(self, command, timeout, ignore_status, 112 stdout, stderr, connect_timeout, env, options, stdin, args, 113 ignore_timeout, ssh_failure_retry_ok): 114 """Helper function for run().""" 115 ssh_cmd = self.ssh_command(connect_timeout, options) 116 if not env.strip(): 117 env = "" 118 else: 119 env = "export %s;" % env 120 for arg in args: 121 command += ' "%s"' % utils.sh_escape(arg) 122 full_cmd = '%s "%s %s"' % (ssh_cmd, env, utils.sh_escape(command)) 123 124 # TODO(jrbarnette): crbug.com/484726 - When we're in an SSP 125 # container, sometimes shortly after reboot we will see DNS 126 # resolution errors on ssh commands; the problem never 127 # occurs more than once in a row. This especially affects 128 # the autoupdate_Rollback test, but other cases have been 129 # affected, too. 130 # 131 # We work around it by detecting the first DNS resolution error 132 # and retrying exactly one time. 133 dns_error_retry_count = 1 134 135 def counters_inc(counter_name, failure_name): 136 """Helper function to increment metrics counters. 137 @param counter_name: string indicating which counter to use 138 @param failure_name: string indentifying an error, or 'success' 139 """ 140 if counter_name == 'call': 141 # ssh_counter records the outcome of each ssh invocation 142 # inside _run(), including exceptions. 143 ssh_counter = metrics.Counter('chromeos/autotest/ssh/calls') 144 fields = {'error' : failure_name or 'success', 145 'attempt' : ssh_call_count} 146 ssh_counter.increment(fields=fields) 147 148 if counter_name == 'run': 149 # run_counter records each call to _run() with its result 150 # and how many tries were made. Calls are recorded when 151 # _run() exits (including exiting with an exception) 152 run_counter = metrics.Counter('chromeos/autotest/ssh/runs') 153 fields = {'error' : failure_name or 'success', 154 'attempt' : ssh_call_count} 155 run_counter.increment(fields=fields) 156 157 # If ssh_failure_retry_ok is True, retry twice on timeouts and generic 158 # error 255: if a simple retry doesn't work, kill the ssh master 159 # connection and try again. (Note that either error could come from 160 # the command running in the DUT, in which case the retry may be 161 # useless but, in theory, also harmless.) 162 if ssh_failure_retry_ok: 163 # Ignore ssh command timeout, even though it could be a timeout due 164 # to the command executing in the remote host. Note that passing 165 # ignore_timeout = True makes utils.run() return None on timeouts 166 # (and only on timeouts). 167 original_ignore_timeout = ignore_timeout 168 ignore_timeout = True 169 ssh_failure_retry_count = 2 170 else: 171 ssh_failure_retry_count = 0 172 173 ssh_call_count = 0 174 175 while True: 176 try: 177 # Increment call count first, in case utils.run() throws an 178 # exception. 179 ssh_call_count += 1 180 result = utils.run(full_cmd, timeout, True, stdout, stderr, 181 verbose=False, stdin=stdin, 182 stderr_is_expected=ignore_status, 183 ignore_timeout=ignore_timeout) 184 except Exception as e: 185 # No retries on exception. 186 counters_inc('call', 'exception') 187 counters_inc('run', 'exception') 188 raise e 189 190 failure_name = None 191 192 if result: 193 if result.exit_status == 255: 194 if re.search(r'^ssh: .*: Name or service not known', 195 result.stderr): 196 failure_name = 'dns_failure' 197 else: 198 failure_name = 'error_255' 199 elif result.exit_status > 0: 200 failure_name = 'nonzero_status' 201 else: 202 # result == None 203 failure_name = 'timeout' 204 205 # Record the outcome of the ssh invocation. 206 counters_inc('call', failure_name) 207 208 if failure_name: 209 # There was a failure: decide whether to retry. 210 if failure_name == 'dns_failure': 211 if dns_error_retry_count > 0: 212 logging.debug('retrying ssh because of DNS failure') 213 dns_error_retry_count -= 1 214 continue 215 else: 216 if ssh_failure_retry_count == 2: 217 logging.debug('retrying ssh command after %s', 218 failure_name) 219 ssh_failure_retry_count -= 1 220 continue 221 elif ssh_failure_retry_count == 1: 222 # After two failures, restart the master connection 223 # before the final try. 224 logging.debug('retry 2: restarting master connection') 225 self.restart_master_ssh() 226 # Last retry: reinstate timeout behavior. 227 ignore_timeout = original_ignore_timeout 228 ssh_failure_retry_count -= 1 229 continue 230 231 # No retry conditions occurred. Exit the loop. 232 break 233 234 # The outcomes of ssh invocations have been recorded. Now record 235 # the outcome of this function. 236 237 if ignore_timeout and not result: 238 counters_inc('run', 'ignored_timeout') 239 return None 240 241 # The error messages will show up in band (indistinguishable 242 # from stuff sent through the SSH connection), so we have the 243 # remote computer echo the message "Connected." before running 244 # any command. Since the following 2 errors have to do with 245 # connecting, it's safe to do these checks. 246 if result.exit_status == 255: 247 if re.search(r'^ssh: connect to host .* port .*: ' 248 r'Connection timed out\r$', result.stderr): 249 counters_inc('run', 'final_timeout') 250 raise error.AutoservSSHTimeout("ssh timed out", result) 251 if "Permission denied." in result.stderr: 252 msg = "ssh permission denied" 253 counters_inc('run', 'final_eperm') 254 raise error.AutoservSshPermissionDeniedError(msg, result) 255 256 if not ignore_status and result.exit_status > 0: 257 counters_inc('run', 'final_run_error') 258 raise error.AutoservRunError("command execution error", result) 259 260 counters_inc('run', failure_name) 261 return result 262 263 264 @metrics.SecondsTimerDecorator( 265 'chromeos/autotest/ssh/master_ssh_time') 266 def run_very_slowly(self, command, timeout=3600, ignore_status=False, 267 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS, 268 connect_timeout=30, options='', stdin=None, verbose=True, args=(), 269 ignore_timeout=False, ssh_failure_retry_ok=False): 270 """ 271 Run a command on the remote host. 272 This RPC call has an overhead of minimum 40ms and up to 400ms on 273 servers (crbug.com/734887). Each time a run_very_slowly is added for 274 every job - a server core dies in the lab. 275 @see common_lib.hosts.host.run() 276 277 @param timeout: command execution timeout 278 @param connect_timeout: ssh connection timeout (in seconds) 279 @param options: string with additional ssh command options 280 @param verbose: log the commands 281 @param ignore_timeout: bool True if SSH command timeouts should be 282 ignored. Will return None on command timeout. 283 @param ssh_failure_retry_ok: True if the command may be retried on 284 probable ssh failure (error 255 or timeout). When true, 285 the command may be executed up to three times, the second 286 time after restarting the ssh master connection. Use only for 287 commands that are idempotent, because when a "probable 288 ssh failure" occurs, we cannot tell if the command executed 289 or not. 290 291 @raises AutoservRunError: if the command failed 292 @raises AutoservSSHTimeout: ssh connection has timed out 293 """ 294 if verbose: 295 stack = self._get_server_stack_state(lowest_frames=1, highest_frames=7) 296 logging.debug("Running (ssh) '%s' from '%s'", command, stack) 297 command = self._verbose_logger_command(command) 298 299 # Start a master SSH connection if necessary. 300 self.start_master_ssh() 301 302 env = " ".join("=".join(pair) for pair in self.env.iteritems()) 303 try: 304 return self._run(command, timeout, ignore_status, 305 stdout_tee, stderr_tee, connect_timeout, env, 306 options, stdin, args, ignore_timeout, 307 ssh_failure_retry_ok) 308 except error.CmdError, cmderr: 309 # We get a CmdError here only if there is timeout of that command. 310 # Catch that and stuff it into AutoservRunError and raise it. 311 timeout_message = str('Timeout encountered: %s' % cmderr.args[0]) 312 raise error.AutoservRunError(timeout_message, cmderr.args[1]) 313 314 315 def run(self, *args, **kwargs): 316 return self.run_very_slowly(*args, **kwargs) 317 318 319 def run_background(self, command, verbose=True): 320 """Start a command on the host in the background. 321 322 The command is started on the host in the background, and 323 this method call returns immediately without waiting for the 324 command's completion. The PID of the process on the host is 325 returned as a string. 326 327 The command may redirect its stdin, stdout, or stderr as 328 necessary. Without redirection, all input and output will 329 use /dev/null. 330 331 @param command The command to run in the background 332 @param verbose As for `self.run()` 333 334 @return Returns the PID of the remote background process 335 as a string. 336 """ 337 # Redirection here isn't merely hygienic; it's a functional 338 # requirement. sshd won't terminate until stdin, stdout, 339 # and stderr are all closed. 340 # 341 # The subshell is needed to do the right thing in case the 342 # passed in command has its own I/O redirections. 343 cmd_fmt = '( %s ) </dev/null >/dev/null 2>&1 & echo -n $!' 344 return self.run(cmd_fmt % command, verbose=verbose).stdout 345 346 347 def run_short(self, command, **kwargs): 348 """ 349 Calls the run() command with a short default timeout. 350 351 Takes the same arguments as does run(), 352 with the exception of the timeout argument which 353 here is fixed at 60 seconds. 354 It returns the result of run. 355 356 @param command: the command line string 357 358 """ 359 return self.run(command, timeout=60, **kwargs) 360 361 362 def run_grep(self, command, timeout=30, ignore_status=False, 363 stdout_ok_regexp=None, stdout_err_regexp=None, 364 stderr_ok_regexp=None, stderr_err_regexp=None, 365 connect_timeout=30): 366 """ 367 Run a command on the remote host and look for regexp 368 in stdout or stderr to determine if the command was 369 successul or not. 370 371 372 @param command: the command line string 373 @param timeout: time limit in seconds before attempting to 374 kill the running process. The run() function 375 will take a few seconds longer than 'timeout' 376 to complete if it has to kill the process. 377 @param ignore_status: do not raise an exception, no matter 378 what the exit code of the command is. 379 @param stdout_ok_regexp: regexp that should be in stdout 380 if the command was successul. 381 @param stdout_err_regexp: regexp that should be in stdout 382 if the command failed. 383 @param stderr_ok_regexp: regexp that should be in stderr 384 if the command was successul. 385 @param stderr_err_regexp: regexp that should be in stderr 386 if the command failed. 387 @param connect_timeout: connection timeout (in seconds) 388 389 Returns: 390 if the command was successul, raises an exception 391 otherwise. 392 393 Raises: 394 AutoservRunError: 395 - the exit code of the command execution was not 0. 396 - If stderr_err_regexp is found in stderr, 397 - If stdout_err_regexp is found in stdout, 398 - If stderr_ok_regexp is not found in stderr. 399 - If stdout_ok_regexp is not found in stdout, 400 """ 401 402 # We ignore the status, because we will handle it at the end. 403 result = self.run(command, timeout, ignore_status=True, 404 connect_timeout=connect_timeout) 405 406 # Look for the patterns, in order 407 for (regexp, stream) in ((stderr_err_regexp, result.stderr), 408 (stdout_err_regexp, result.stdout)): 409 if regexp and stream: 410 err_re = re.compile (regexp) 411 if err_re.search(stream): 412 raise error.AutoservRunError( 413 '%s failed, found error pattern: "%s"' % (command, 414 regexp), result) 415 416 for (regexp, stream) in ((stderr_ok_regexp, result.stderr), 417 (stdout_ok_regexp, result.stdout)): 418 if regexp and stream: 419 ok_re = re.compile (regexp) 420 if ok_re.search(stream): 421 if ok_re.search(stream): 422 return 423 424 if not ignore_status and result.exit_status > 0: 425 raise error.AutoservRunError("command execution error", result) 426 427 428 def setup_ssh_key(self): 429 """Setup SSH Key""" 430 logging.debug('Performing SSH key setup on %s:%d as %s.', 431 self.hostname, self.port, self.user) 432 433 try: 434 host = pxssh.pxssh() 435 host.login(self.hostname, self.user, self.password, 436 port=self.port) 437 public_key = utils.get_public_key() 438 439 host.sendline('mkdir -p ~/.ssh') 440 host.prompt() 441 host.sendline('chmod 700 ~/.ssh') 442 host.prompt() 443 host.sendline("echo '%s' >> ~/.ssh/authorized_keys; " % 444 public_key) 445 host.prompt() 446 host.sendline('chmod 600 ~/.ssh/authorized_keys') 447 host.prompt() 448 host.logout() 449 450 logging.debug('SSH key setup complete.') 451 452 except: 453 logging.debug('SSH key setup has failed.') 454 try: 455 host.logout() 456 except: 457 pass 458 459 460 def setup_ssh(self): 461 """Setup SSH""" 462 if self.password: 463 try: 464 self.ssh_ping() 465 except error.AutoservSshPingHostError: 466 self.setup_ssh_key() 467