1# Copyright (c) 2008 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import os, time, socket, shutil, glob, logging, tempfile, re 6import shlex 7import subprocess 8 9from autotest_lib.client.bin.result_tools import runner as result_tools_runner 10from autotest_lib.client.common_lib import error 11from autotest_lib.client.common_lib.cros.network import ping_runner 12from autotest_lib.client.common_lib.global_config import global_config 13from autotest_lib.server import utils, autotest 14from autotest_lib.server.hosts import host_info 15from autotest_lib.server.hosts import remote 16from autotest_lib.server.hosts import rpc_server_tracker 17from autotest_lib.server.hosts import ssh_multiplex 18 19# pylint: disable=C0111 20 21get_value = global_config.get_config_value 22enable_master_ssh = get_value('AUTOSERV', 'enable_master_ssh', type=bool, 23 default=False) 24 25# Number of seconds to use the cached up status. 26_DEFAULT_UP_STATUS_EXPIRATION_SECONDS = 300 27_DEFAULT_SSH_PORT = 22 28 29# Number of seconds to wait for the host to shut down in wait_down(). 30_DEFAULT_WAIT_DOWN_TIME_SECONDS = 120 31 32# Number of seconds to wait for the host to boot up in wait_up(). 33_DEFAULT_WAIT_UP_TIME_SECONDS = 120 34 35# Timeout in seconds for a single call of get_boot_id() in wait_down() 36# and a single ssh ping in wait_up(). 37_DEFAULT_MAX_PING_TIMEOUT = 10 38 39class AbstractSSHHost(remote.RemoteHost): 40 """ 41 This class represents a generic implementation of most of the 42 framework necessary for controlling a host via ssh. It implements 43 almost all of the abstract Host methods, except for the core 44 Host.run method. 45 """ 46 VERSION_PREFIX = '' 47 # Timeout for master ssh connection setup, in seconds. 48 DEFAULT_START_MASTER_SSH_TIMEOUT_S = 5 49 50 def _initialize(self, hostname, user="root", port=_DEFAULT_SSH_PORT, 51 password="", is_client_install_supported=True, 52 afe_host=None, host_info_store=None, connection_pool=None, 53 *args, **dargs): 54 super(AbstractSSHHost, self)._initialize(hostname=hostname, 55 *args, **dargs) 56 """ 57 @param hostname: The hostname of the host. 58 @param user: The username to use when ssh'ing into the host. 59 @param password: The password to use when ssh'ing into the host. 60 @param port: The port to use for ssh. 61 @param is_client_install_supported: Boolean to indicate if we can 62 install autotest on the host. 63 @param afe_host: The host object attained from the AFE (get_hosts). 64 @param host_info_store: Optional host_info.CachingHostInfoStore object 65 to obtain / update host information. 66 @param connection_pool: ssh_multiplex.ConnectionPool instance to share 67 the master ssh connection across control scripts. 68 """ 69 # IP address is retrieved only on demand. Otherwise the host 70 # initialization will fail for host is not online. 71 self._ip = None 72 self.user = user 73 self.port = port 74 self.password = password 75 self._is_client_install_supported = is_client_install_supported 76 self._use_rsync = None 77 self.known_hosts_file = tempfile.mkstemp()[1] 78 self._rpc_server_tracker = rpc_server_tracker.RpcServerTracker(self); 79 80 """ 81 Master SSH connection background job, socket temp directory and socket 82 control path option. If master-SSH is enabled, these fields will be 83 initialized by start_master_ssh when a new SSH connection is initiated. 84 """ 85 self._connection_pool = connection_pool 86 if connection_pool: 87 self._master_ssh = connection_pool.get(hostname, user, port) 88 else: 89 self._master_ssh = ssh_multiplex.MasterSsh(hostname, user, port) 90 91 self._afe_host = afe_host or utils.EmptyAFEHost() 92 self.host_info_store = (host_info_store or 93 host_info.InMemoryHostInfoStore()) 94 95 # The cached status of whether the DUT responded to ping. 96 self._cached_up_status = None 97 # The timestamp when the value of _cached_up_status is set. 98 self._cached_up_status_updated = None 99 100 101 @property 102 def ip(self): 103 """@return IP address of the host. 104 """ 105 if not self._ip: 106 self._ip = socket.getaddrinfo(self.hostname, None)[0][4][0] 107 return self._ip 108 109 110 @property 111 def is_client_install_supported(self): 112 """" 113 Returns True if the host supports autotest client installs, False 114 otherwise. 115 """ 116 return self._is_client_install_supported 117 118 119 @property 120 def rpc_server_tracker(self): 121 """" 122 @return The RPC server tracker associated with this host. 123 """ 124 return self._rpc_server_tracker 125 126 127 @property 128 def is_default_port(self): 129 """Returns True if its port is default SSH port.""" 130 return self.port == _DEFAULT_SSH_PORT 131 132 @property 133 def host_port(self): 134 """Returns hostname if port is default. Otherwise, hostname:port. 135 """ 136 if self.is_default_port: 137 return self.hostname 138 else: 139 return '%s:%d' % (self.hostname, self.port) 140 141 142 # Though it doesn't use self here, it is not declared as staticmethod 143 # because its subclass may use self to access member variables. 144 def make_ssh_command(self, user="root", port=_DEFAULT_SSH_PORT, opts='', 145 hosts_file='/dev/null', connect_timeout=30, 146 alive_interval=300, alive_count_max=3, 147 connection_attempts=1): 148 ssh_options = " ".join([ 149 opts, 150 self.make_ssh_options( 151 hosts_file=hosts_file, connect_timeout=connect_timeout, 152 alive_interval=alive_interval, alive_count_max=alive_count_max, 153 connection_attempts=connection_attempts)]) 154 return "/usr/bin/ssh -a -x %s -l %s -p %d" % (ssh_options, user, port) 155 156 157 @staticmethod 158 def make_ssh_options(hosts_file='/dev/null', connect_timeout=30, 159 alive_interval=300, alive_count_max=3, 160 connection_attempts=1): 161 """Composes SSH -o options.""" 162 assert isinstance(connect_timeout, (int, long)) 163 assert connect_timeout > 0 # can't disable the timeout 164 165 options = [("StrictHostKeyChecking", "no"), 166 ("UserKnownHostsFile", hosts_file), 167 ("BatchMode", "yes"), 168 ("ConnectTimeout", str(connect_timeout)), 169 ("ServerAliveInterval", str(alive_interval)), 170 ("ServerAliveCountMax", str(alive_count_max)), 171 ("ConnectionAttempts", str(connection_attempts))] 172 return " ".join("-o %s=%s" % kv for kv in options) 173 174 175 def use_rsync(self): 176 if self._use_rsync is not None: 177 return self._use_rsync 178 179 # Check if rsync is available on the remote host. If it's not, 180 # don't try to use it for any future file transfers. 181 self._use_rsync = self.check_rsync() 182 if not self._use_rsync: 183 logging.warning("rsync not available on remote host %s -- disabled", 184 self.host_port) 185 return self._use_rsync 186 187 188 def check_rsync(self): 189 """ 190 Check if rsync is available on the remote host. 191 """ 192 try: 193 self.run("rsync --version", stdout_tee=None, stderr_tee=None) 194 except error.AutoservRunError: 195 return False 196 return True 197 198 199 def _encode_remote_paths(self, paths, escape=True, use_scp=False): 200 """ 201 Given a list of file paths, encodes it as a single remote path, in 202 the style used by rsync and scp. 203 escape: add \\ to protect special characters. 204 use_scp: encode for scp if true, rsync if false. 205 """ 206 if escape: 207 paths = [utils.scp_remote_escape(path) for path in paths] 208 209 remote = self.hostname 210 211 # rsync and scp require IPv6 brackets, even when there isn't any 212 # trailing port number (ssh doesn't support IPv6 brackets). 213 # In the Python >= 3.3 future, 'import ipaddress' will parse addresses. 214 if re.search(r':.*:', remote): 215 remote = '[%s]' % remote 216 217 if use_scp: 218 return '%s@%s:"%s"' % (self.user, remote, " ".join(paths)) 219 else: 220 return '%s@%s:%s' % ( 221 self.user, remote, 222 " :".join('"%s"' % p for p in paths)) 223 224 def _encode_local_paths(self, paths, escape=True): 225 """ 226 Given a list of file paths, encodes it as a single local path. 227 escape: add \\ to protect special characters. 228 """ 229 if escape: 230 paths = [utils.sh_escape(path) for path in paths] 231 232 return " ".join('"%s"' % p for p in paths) 233 234 235 def rsync_options(self, delete_dest=False, preserve_symlinks=False, 236 safe_symlinks=False, excludes=None): 237 """Obtains rsync options for the remote.""" 238 ssh_cmd = self.make_ssh_command(user=self.user, port=self.port, 239 opts=self._master_ssh.ssh_option, 240 hosts_file=self.known_hosts_file) 241 if delete_dest: 242 delete_flag = "--delete" 243 else: 244 delete_flag = "" 245 if safe_symlinks: 246 symlink_flag = "-l --safe-links" 247 elif preserve_symlinks: 248 symlink_flag = "-l" 249 else: 250 symlink_flag = "-L" 251 exclude_args = '' 252 if excludes: 253 exclude_args = ' '.join( 254 ["--exclude '%s'" % exclude for exclude in excludes]) 255 return "%s %s --timeout=1800 --rsh='%s' -az --no-o --no-g %s" % ( 256 symlink_flag, delete_flag, ssh_cmd, exclude_args) 257 258 259 def _make_rsync_cmd(self, sources, dest, delete_dest, 260 preserve_symlinks, safe_symlinks, excludes=None): 261 """ 262 Given a string of source paths and a destination path, produces the 263 appropriate rsync command for copying them. Remote paths must be 264 pre-encoded. 265 """ 266 rsync_options = self.rsync_options( 267 delete_dest=delete_dest, preserve_symlinks=preserve_symlinks, 268 safe_symlinks=safe_symlinks, excludes=excludes) 269 return 'rsync %s %s "%s"' % (rsync_options, sources, dest) 270 271 272 def _make_ssh_cmd(self, cmd): 273 """ 274 Create a base ssh command string for the host which can be used 275 to run commands directly on the machine 276 """ 277 base_cmd = self.make_ssh_command(user=self.user, port=self.port, 278 opts=self._master_ssh.ssh_option, 279 hosts_file=self.known_hosts_file) 280 281 return '%s %s "%s"' % (base_cmd, self.hostname, utils.sh_escape(cmd)) 282 283 def _make_scp_cmd(self, sources, dest): 284 """ 285 Given a string of source paths and a destination path, produces the 286 appropriate scp command for encoding it. Remote paths must be 287 pre-encoded. 288 """ 289 command = ("scp -rq %s -o StrictHostKeyChecking=no " 290 "-o UserKnownHostsFile=%s -P %d %s '%s'") 291 return command % (self._master_ssh.ssh_option, self.known_hosts_file, 292 self.port, sources, dest) 293 294 295 def _make_rsync_compatible_globs(self, path, is_local): 296 """ 297 Given an rsync-style path, returns a list of globbed paths 298 that will hopefully provide equivalent behaviour for scp. Does not 299 support the full range of rsync pattern matching behaviour, only that 300 exposed in the get/send_file interface (trailing slashes). 301 302 The is_local param is flag indicating if the paths should be 303 interpreted as local or remote paths. 304 """ 305 306 # non-trailing slash paths should just work 307 if len(path) == 0 or path[-1] != "/": 308 return [path] 309 310 # make a function to test if a pattern matches any files 311 if is_local: 312 def glob_matches_files(path, pattern): 313 return len(glob.glob(path + pattern)) > 0 314 else: 315 def glob_matches_files(path, pattern): 316 result = self.run("ls \"%s\"%s" % (utils.sh_escape(path), 317 pattern), 318 stdout_tee=None, ignore_status=True) 319 return result.exit_status == 0 320 321 # take a set of globs that cover all files, and see which are needed 322 patterns = ["*", ".[!.]*"] 323 patterns = [p for p in patterns if glob_matches_files(path, p)] 324 325 # convert them into a set of paths suitable for the commandline 326 if is_local: 327 return ["\"%s\"%s" % (utils.sh_escape(path), pattern) 328 for pattern in patterns] 329 else: 330 return [utils.scp_remote_escape(path) + pattern 331 for pattern in patterns] 332 333 334 def _make_rsync_compatible_source(self, source, is_local): 335 """ 336 Applies the same logic as _make_rsync_compatible_globs, but 337 applies it to an entire list of sources, producing a new list of 338 sources, properly quoted. 339 """ 340 return sum((self._make_rsync_compatible_globs(path, is_local) 341 for path in source), []) 342 343 344 def _set_umask_perms(self, dest): 345 """ 346 Given a destination file/dir (recursively) set the permissions on 347 all the files and directories to the max allowed by running umask. 348 """ 349 350 # now this looks strange but I haven't found a way in Python to _just_ 351 # get the umask, apparently the only option is to try to set it 352 umask = os.umask(0) 353 os.umask(umask) 354 355 max_privs = 0777 & ~umask 356 357 def set_file_privs(filename): 358 """Sets mode of |filename|. Assumes |filename| exists.""" 359 file_stat = os.stat(filename) 360 361 file_privs = max_privs 362 # if the original file permissions do not have at least one 363 # executable bit then do not set it anywhere 364 if not file_stat.st_mode & 0111: 365 file_privs &= ~0111 366 367 os.chmod(filename, file_privs) 368 369 # try a bottom-up walk so changes on directory permissions won't cut 370 # our access to the files/directories inside it 371 for root, dirs, files in os.walk(dest, topdown=False): 372 # when setting the privileges we emulate the chmod "X" behaviour 373 # that sets to execute only if it is a directory or any of the 374 # owner/group/other already has execute right 375 for dirname in dirs: 376 os.chmod(os.path.join(root, dirname), max_privs) 377 378 # Filter out broken symlinks as we go. 379 for filename in filter(os.path.exists, files): 380 set_file_privs(os.path.join(root, filename)) 381 382 383 # now set privs for the dest itself 384 if os.path.isdir(dest): 385 os.chmod(dest, max_privs) 386 else: 387 set_file_privs(dest) 388 389 390 def get_file(self, source, dest, delete_dest=False, preserve_perm=True, 391 preserve_symlinks=False, retry=True, safe_symlinks=False, 392 try_rsync=True): 393 """ 394 Copy files from the remote host to a local path. 395 396 Directories will be copied recursively. 397 If a source component is a directory with a trailing slash, 398 the content of the directory will be copied, otherwise, the 399 directory itself and its content will be copied. This 400 behavior is similar to that of the program 'rsync'. 401 402 Args: 403 source: either 404 1) a single file or directory, as a string 405 2) a list of one or more (possibly mixed) 406 files or directories 407 dest: a file or a directory (if source contains a 408 directory or more than one element, you must 409 supply a directory dest) 410 delete_dest: if this is true, the command will also clear 411 out any old files at dest that are not in the 412 source 413 preserve_perm: tells get_file() to try to preserve the sources 414 permissions on files and dirs 415 preserve_symlinks: try to preserve symlinks instead of 416 transforming them into files/dirs on copy 417 safe_symlinks: same as preserve_symlinks, but discard links 418 that may point outside the copied tree 419 try_rsync: set to False to skip directly to using scp 420 Raises: 421 AutoservRunError: the scp command failed 422 """ 423 logging.debug('get_file. source: %s, dest: %s, delete_dest: %s,' 424 'preserve_perm: %s, preserve_symlinks:%s', source, dest, 425 delete_dest, preserve_perm, preserve_symlinks) 426 427 # Start a master SSH connection if necessary. 428 self.start_master_ssh() 429 430 if isinstance(source, basestring): 431 source = [source] 432 dest = os.path.abspath(dest) 433 434 # If rsync is disabled or fails, try scp. 435 try_scp = True 436 if try_rsync and self.use_rsync(): 437 logging.debug('Using Rsync.') 438 try: 439 remote_source = self._encode_remote_paths(source) 440 local_dest = utils.sh_escape(dest) 441 rsync = self._make_rsync_cmd(remote_source, local_dest, 442 delete_dest, preserve_symlinks, 443 safe_symlinks) 444 utils.run(rsync) 445 try_scp = False 446 except error.CmdError, e: 447 # retry on rsync exit values which may be caused by transient 448 # network problems: 449 # 450 # rc 10: Error in socket I/O 451 # rc 12: Error in rsync protocol data stream 452 # rc 23: Partial transfer due to error 453 # rc 255: Ssh error 454 # 455 # Note that rc 23 includes dangling symlinks. In this case 456 # retrying is useless, but not very damaging since rsync checks 457 # for those before starting the transfer (scp does not). 458 status = e.result_obj.exit_status 459 if status in [10, 12, 23, 255] and retry: 460 logging.warning('rsync status %d, retrying', status) 461 self.get_file(source, dest, delete_dest, preserve_perm, 462 preserve_symlinks, retry=False) 463 # The nested get_file() does all that's needed. 464 return 465 else: 466 logging.warning("trying scp, rsync failed: %s (%d)", 467 e, status) 468 469 if try_scp: 470 logging.debug('Trying scp.') 471 # scp has no equivalent to --delete, just drop the entire dest dir 472 if delete_dest and os.path.isdir(dest): 473 shutil.rmtree(dest) 474 os.mkdir(dest) 475 476 remote_source = self._make_rsync_compatible_source(source, False) 477 if remote_source: 478 # _make_rsync_compatible_source() already did the escaping 479 remote_source = self._encode_remote_paths( 480 remote_source, escape=False, use_scp=True) 481 local_dest = utils.sh_escape(dest) 482 scp = self._make_scp_cmd(remote_source, local_dest) 483 try: 484 utils.run(scp) 485 except error.CmdError, e: 486 logging.debug('scp failed: %s', e) 487 raise error.AutoservRunError(e.args[0], e.args[1]) 488 489 if not preserve_perm: 490 # we have no way to tell scp to not try to preserve the 491 # permissions so set them after copy instead. 492 # for rsync we could use "--no-p --chmod=ugo=rwX" but those 493 # options are only in very recent rsync versions 494 self._set_umask_perms(dest) 495 496 497 def send_file(self, source, dest, delete_dest=False, 498 preserve_symlinks=False, excludes=None): 499 """ 500 Copy files from a local path to the remote host. 501 502 Directories will be copied recursively. 503 If a source component is a directory with a trailing slash, 504 the content of the directory will be copied, otherwise, the 505 directory itself and its content will be copied. This 506 behavior is similar to that of the program 'rsync'. 507 508 Args: 509 source: either 510 1) a single file or directory, as a string 511 2) a list of one or more (possibly mixed) 512 files or directories 513 dest: a file or a directory (if source contains a 514 directory or more than one element, you must 515 supply a directory dest) 516 delete_dest: if this is true, the command will also clear 517 out any old files at dest that are not in the 518 source 519 preserve_symlinks: controls if symlinks on the source will be 520 copied as such on the destination or transformed into the 521 referenced file/directory 522 excludes: A list of file pattern that matches files not to be 523 sent. `send_file` will fail if exclude is set, since 524 local copy does not support --exclude, e.g., when 525 using scp to copy file. 526 527 Raises: 528 AutoservRunError: the scp command failed 529 """ 530 logging.debug('send_file. source: %s, dest: %s, delete_dest: %s,' 531 'preserve_symlinks:%s', source, dest, 532 delete_dest, preserve_symlinks) 533 # Start a master SSH connection if necessary. 534 self.start_master_ssh() 535 536 if isinstance(source, basestring): 537 source = [source] 538 539 local_sources = self._encode_local_paths(source) 540 if not local_sources: 541 raise error.TestError('source |%s| yielded an empty string' % ( 542 source)) 543 if local_sources.find('\x00') != -1: 544 raise error.TestError('one or more sources include NUL char') 545 546 # If rsync is disabled or fails, try scp. 547 try_scp = True 548 if self.use_rsync(): 549 logging.debug('Using Rsync.') 550 remote_dest = self._encode_remote_paths([dest]) 551 try: 552 rsync = self._make_rsync_cmd(local_sources, remote_dest, 553 delete_dest, preserve_symlinks, 554 False, excludes=excludes) 555 utils.run(rsync) 556 try_scp = False 557 except error.CmdError, e: 558 logging.warning("trying scp, rsync failed: %s", e) 559 560 if try_scp: 561 logging.debug('Trying scp.') 562 if excludes: 563 raise error.AutotestHostRunError( 564 '--exclude is not supported in scp, try to use rsync. ' 565 'excludes: %s' % ','.join(excludes), None) 566 # scp has no equivalent to --delete, just drop the entire dest dir 567 if delete_dest: 568 is_dir = self.run("ls -d %s/" % dest, 569 ignore_status=True).exit_status == 0 570 if is_dir: 571 cmd = "rm -rf %s && mkdir %s" 572 cmd %= (dest, dest) 573 self.run(cmd) 574 575 remote_dest = self._encode_remote_paths([dest], use_scp=True) 576 local_sources = self._make_rsync_compatible_source(source, True) 577 if local_sources: 578 sources = self._encode_local_paths(local_sources, escape=False) 579 scp = self._make_scp_cmd(sources, remote_dest) 580 try: 581 utils.run(scp) 582 except error.CmdError, e: 583 logging.debug('scp failed: %s', e) 584 raise error.AutoservRunError(e.args[0], e.args[1]) 585 else: 586 logging.debug('skipping scp for empty source list') 587 588 589 def verify_ssh_user_access(self): 590 """Verify ssh access to this host. 591 592 @returns False if ssh_ping fails due to Permissions error, True 593 otherwise. 594 """ 595 try: 596 self.ssh_ping() 597 except (error.AutoservSshPermissionDeniedError, 598 error.AutoservSshPingHostError): 599 return False 600 return True 601 602 603 def ssh_ping(self, timeout=60, connect_timeout=None, base_cmd='true'): 604 """ 605 Pings remote host via ssh. 606 607 @param timeout: Command execution timeout in seconds. 608 Defaults to 60 seconds. 609 @param connect_timeout: ssh connection timeout in seconds. 610 @param base_cmd: The base command to run with the ssh ping. 611 Defaults to true. 612 @raise AutoservSSHTimeout: If the ssh ping times out. 613 @raise AutoservSshPermissionDeniedError: If ssh ping fails due to 614 permissions. 615 @raise AutoservSshPingHostError: For other AutoservRunErrors. 616 """ 617 ctimeout = min(timeout, connect_timeout or timeout) 618 try: 619 self.run(base_cmd, timeout=timeout, connect_timeout=ctimeout, 620 ssh_failure_retry_ok=True) 621 except error.AutoservSSHTimeout: 622 msg = "Host (ssh) verify timed out (timeout = %d)" % timeout 623 raise error.AutoservSSHTimeout(msg) 624 except error.AutoservSshPermissionDeniedError: 625 #let AutoservSshPermissionDeniedError be visible to the callers 626 raise 627 except error.AutoservRunError, e: 628 # convert the generic AutoservRunError into something more 629 # specific for this context 630 raise error.AutoservSshPingHostError(e.description + '\n' + 631 repr(e.result_obj)) 632 633 634 def is_up(self, timeout=60, connect_timeout=None, base_cmd='true'): 635 """ 636 Check if the remote host is up by ssh-ing and running a base command. 637 638 @param timeout: command execution timeout in seconds. 639 @param connect_timeout: ssh connection timeout in seconds. 640 @param base_cmd: a base command to run with ssh. The default is 'true'. 641 @returns True if the remote host is up before the timeout expires, 642 False otherwise. 643 """ 644 try: 645 self.ssh_ping(timeout=timeout, 646 connect_timeout=connect_timeout, 647 base_cmd=base_cmd) 648 except error.AutoservError: 649 return False 650 else: 651 return True 652 653 654 def is_up_fast(self): 655 """Return True if the host can be pinged.""" 656 ping_config = ping_runner.PingConfig( 657 self.hostname, count=3, ignore_result=True, ignore_status=True) 658 return ping_runner.PingRunner().ping(ping_config).received > 0 659 660 661 def wait_up(self, timeout=_DEFAULT_WAIT_UP_TIME_SECONDS): 662 """ 663 Wait until the remote host is up or the timeout expires. 664 665 In fact, it will wait until an ssh connection to the remote 666 host can be established, and getty is running. 667 668 @param timeout time limit in seconds before returning even 669 if the host is not up. 670 671 @returns True if the host was found to be up before the timeout expires, 672 False otherwise 673 """ 674 current_time = int(time.time()) 675 end_time = current_time + timeout 676 677 autoserv_error_logged = False 678 while current_time < end_time: 679 ping_timeout = min(_DEFAULT_MAX_PING_TIMEOUT, 680 end_time - current_time) 681 if self.is_up(timeout=ping_timeout, connect_timeout=ping_timeout): 682 try: 683 if self.are_wait_up_processes_up(): 684 logging.debug('Host %s is now up', self.host_port) 685 return True 686 except error.AutoservError as e: 687 if not autoserv_error_logged: 688 logging.debug('Ignoring failure to reach %s: %s %s', 689 self.host_port, e, 690 '(and further similar failures)') 691 autoserv_error_logged = True 692 time.sleep(1) 693 current_time = int(time.time()) 694 695 logging.debug('Host %s is still down after waiting %d seconds', 696 self.host_port, int(timeout + time.time() - end_time)) 697 return False 698 699 700 def wait_down(self, timeout=_DEFAULT_WAIT_DOWN_TIME_SECONDS, 701 warning_timer=None, old_boot_id=None, 702 max_ping_timeout=_DEFAULT_MAX_PING_TIMEOUT): 703 """ 704 Wait until the remote host is down or the timeout expires. 705 706 If old_boot_id is provided, waits until either the machine is 707 unpingable or self.get_boot_id() returns a value different from 708 old_boot_id. If the boot_id value has changed then the function 709 returns True under the assumption that the machine has shut down 710 and has now already come back up. 711 712 If old_boot_id is None then until the machine becomes unreachable the 713 method assumes the machine has not yet shut down. 714 715 @param timeout Time limit in seconds before returning even if the host 716 is still up. 717 @param warning_timer Time limit in seconds that will generate a warning 718 if the host is not down yet. Can be None for no warning. 719 @param old_boot_id A string containing the result of self.get_boot_id() 720 prior to the host being told to shut down. Can be None if this is 721 not available. 722 @param max_ping_timeout Maximum timeout in seconds for each 723 self.get_boot_id() call. If this timeout is hit, it is assumed that 724 the host went down and became unreachable. 725 726 @returns True if the host was found to be down (max_ping_timeout timeout 727 expired or boot_id changed if provided) and False if timeout 728 expired. 729 """ 730 #TODO: there is currently no way to distinguish between knowing 731 #TODO: boot_id was unsupported and not knowing the boot_id. 732 current_time = int(time.time()) 733 end_time = current_time + timeout 734 735 if warning_timer: 736 warn_time = current_time + warning_timer 737 738 if old_boot_id is not None: 739 logging.debug('Host %s pre-shutdown boot_id is %s', 740 self.host_port, old_boot_id) 741 742 # Impose semi real-time deadline constraints, since some clients 743 # (eg: watchdog timer tests) expect strict checking of time elapsed. 744 # Each iteration of this loop is treated as though it atomically 745 # completes within current_time, this is needed because if we used 746 # inline time.time() calls instead then the following could happen: 747 # 748 # while time.time() < end_time: [23 < 30] 749 # some code. [takes 10 secs] 750 # try: 751 # new_boot_id = self.get_boot_id(timeout=end_time - time.time()) 752 # [30 - 33] 753 # The last step will lead to a return True, when in fact the machine 754 # went down at 32 seconds (>30). Hence we need to pass get_boot_id 755 # the same time that allowed us into that iteration of the loop. 756 while current_time < end_time: 757 ping_timeout = min(end_time - current_time, max_ping_timeout) 758 try: 759 new_boot_id = self.get_boot_id(timeout=ping_timeout) 760 except error.AutoservError: 761 logging.debug('Host %s is now unreachable over ssh, is down', 762 self.host_port) 763 return True 764 else: 765 # if the machine is up but the boot_id value has changed from 766 # old boot id, then we can assume the machine has gone down 767 # and then already come back up 768 if old_boot_id is not None and old_boot_id != new_boot_id: 769 logging.debug('Host %s now has boot_id %s and so must ' 770 'have rebooted', self.host_port, new_boot_id) 771 return True 772 773 if warning_timer and current_time > warn_time: 774 self.record("INFO", None, "shutdown", 775 "Shutdown took longer than %ds" % warning_timer) 776 # Print the warning only once. 777 warning_timer = None 778 # If a machine is stuck switching runlevels 779 # This may cause the machine to reboot. 780 self.run('kill -HUP 1', ignore_status=True) 781 782 time.sleep(1) 783 current_time = int(time.time()) 784 785 return False 786 787 788 # tunable constants for the verify & repair code 789 AUTOTEST_GB_DISKSPACE_REQUIRED = get_value("SERVER", 790 "gb_diskspace_required", 791 type=float, 792 default=20.0) 793 794 795 def verify_connectivity(self): 796 super(AbstractSSHHost, self).verify_connectivity() 797 798 logging.info('Pinging host ' + self.host_port) 799 self.ssh_ping() 800 logging.info("Host (ssh) %s is alive", self.host_port) 801 802 if self.is_shutting_down(): 803 raise error.AutoservHostIsShuttingDownError("Host is shutting down") 804 805 806 def verify_software(self): 807 super(AbstractSSHHost, self).verify_software() 808 try: 809 self.check_diskspace(autotest.Autotest.get_install_dir(self), 810 self.AUTOTEST_GB_DISKSPACE_REQUIRED) 811 except error.AutoservDiskFullHostError: 812 # only want to raise if it's a space issue 813 raise 814 except (error.AutoservHostError, autotest.AutodirNotFoundError): 815 logging.exception('autodir space check exception, this is probably ' 816 'safe to ignore\n') 817 818 819 def close(self): 820 super(AbstractSSHHost, self).close() 821 self.rpc_server_tracker.disconnect_all() 822 if not self._connection_pool: 823 self._master_ssh.close() 824 if os.path.exists(self.known_hosts_file): 825 os.remove(self.known_hosts_file) 826 827 828 def restart_master_ssh(self): 829 """ 830 Stop and restart the ssh master connection. This is meant as a last 831 resort when ssh commands fail and we don't understand why. 832 """ 833 logging.debug('Restarting master ssh connection') 834 self._master_ssh.close() 835 self._master_ssh.maybe_start(timeout=30) 836 837 838 839 def start_master_ssh(self, timeout=DEFAULT_START_MASTER_SSH_TIMEOUT_S): 840 """ 841 Called whenever a slave SSH connection needs to be initiated (e.g., by 842 run, rsync, scp). If master SSH support is enabled and a master SSH 843 connection is not active already, start a new one in the background. 844 Also, cleanup any zombie master SSH connections (e.g., dead due to 845 reboot). 846 847 timeout: timeout in seconds (default 5) to wait for master ssh 848 connection to be established. If timeout is reached, a 849 warning message is logged, but no other action is taken. 850 """ 851 if not enable_master_ssh: 852 return 853 self._master_ssh.maybe_start(timeout=timeout) 854 855 856 def clear_known_hosts(self): 857 """Clears out the temporary ssh known_hosts file. 858 859 This is useful if the test SSHes to the machine, then reinstalls it, 860 then SSHes to it again. It can be called after the reinstall to 861 reduce the spam in the logs. 862 """ 863 logging.info("Clearing known hosts for host '%s', file '%s'.", 864 self.host_port, self.known_hosts_file) 865 # Clear out the file by opening it for writing and then closing. 866 fh = open(self.known_hosts_file, "w") 867 fh.close() 868 869 870 def collect_logs(self, remote_src_dir, local_dest_dir, ignore_errors=True): 871 """Copy log directories from a host to a local directory. 872 873 @param remote_src_dir: A destination directory on the host. 874 @param local_dest_dir: A path to a local destination directory. 875 If it doesn't exist it will be created. 876 @param ignore_errors: If True, ignore exceptions. 877 878 @raises OSError: If there were problems creating the local_dest_dir and 879 ignore_errors is False. 880 @raises AutoservRunError, AutotestRunError: If something goes wrong 881 while copying the directories and ignore_errors is False. 882 """ 883 if not self.check_cached_up_status(): 884 logging.warning('Host %s did not answer to ping, skip collecting ' 885 'logs.', self.host_port) 886 return 887 888 locally_created_dest = False 889 if (not os.path.exists(local_dest_dir) 890 or not os.path.isdir(local_dest_dir)): 891 try: 892 os.makedirs(local_dest_dir) 893 locally_created_dest = True 894 except OSError as e: 895 logging.warning('Unable to collect logs from host ' 896 '%s: %s', self.host_port, e) 897 if not ignore_errors: 898 raise 899 return 900 901 # Build test result directory summary 902 try: 903 result_tools_runner.run_on_client(self, remote_src_dir) 904 except (error.AutotestRunError, error.AutoservRunError, 905 error.AutoservSSHTimeout) as e: 906 logging.exception( 907 'Non-critical failure: Failed to collect and throttle ' 908 'results at %s from host %s', remote_src_dir, 909 self.host_port) 910 911 try: 912 self.get_file(remote_src_dir, local_dest_dir, safe_symlinks=True) 913 except (error.AutotestRunError, error.AutoservRunError, 914 error.AutoservSSHTimeout) as e: 915 logging.warning('Collection of %s to local dir %s from host %s ' 916 'failed: %s', remote_src_dir, local_dest_dir, 917 self.host_port, e) 918 if locally_created_dest: 919 shutil.rmtree(local_dest_dir, ignore_errors=ignore_errors) 920 if not ignore_errors: 921 raise 922 923 # Clean up directory summary file on the client side. 924 try: 925 result_tools_runner.run_on_client(self, remote_src_dir, 926 cleanup_only=True) 927 except (error.AutotestRunError, error.AutoservRunError, 928 error.AutoservSSHTimeout) as e: 929 logging.exception( 930 'Non-critical failure: Failed to cleanup result summary ' 931 'files at %s in host %s', remote_src_dir, self.hostname) 932 933 934 def create_ssh_tunnel(self, port, local_port): 935 """Create an ssh tunnel from local_port to port. 936 937 This is used to forward a port securely through a tunnel process from 938 the server to the DUT for RPC server connection. 939 940 @param port: remote port on the host. 941 @param local_port: local forwarding port. 942 943 @return: the tunnel process. 944 """ 945 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port) 946 ssh_cmd = self.make_ssh_command(opts=tunnel_options, port=self.port) 947 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname) 948 logging.debug('Full tunnel command: %s', tunnel_cmd) 949 # Exec the ssh process directly here rather than using a shell. 950 # Using a shell leaves a dangling ssh process, because we deliver 951 # signals to the shell wrapping ssh, not the ssh process itself. 952 args = shlex.split(tunnel_cmd) 953 with open('/dev/null', 'w') as devnull: 954 tunnel_proc = subprocess.Popen(args, stdout=devnull, stderr=devnull, 955 close_fds=True) 956 logging.debug('Started ssh tunnel, local = %d' 957 ' remote = %d, pid = %d', 958 local_port, port, tunnel_proc.pid) 959 return tunnel_proc 960 961 962 def disconnect_ssh_tunnel(self, tunnel_proc): 963 """ 964 Disconnects a previously forwarded port from the server to the DUT for 965 RPC server connection. 966 967 @param tunnel_proc: a tunnel process returned from |create_ssh_tunnel|. 968 """ 969 if tunnel_proc.poll() is None: 970 tunnel_proc.terminate() 971 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid) 972 else: 973 logging.debug('Tunnel pid %d terminated early, status %d', 974 tunnel_proc.pid, tunnel_proc.returncode) 975 976 977 def get_os_type(self): 978 """Returns the host OS descriptor (to be implemented in subclasses). 979 980 @return A string describing the OS type. 981 """ 982 raise NotImplementedError 983 984 985 def check_cached_up_status( 986 self, expiration_seconds=_DEFAULT_UP_STATUS_EXPIRATION_SECONDS): 987 """Check if the DUT responded to ping in the past `expiration_seconds`. 988 989 @param expiration_seconds: The number of seconds to keep the cached 990 status of whether the DUT responded to ping. 991 @return: True if the DUT has responded to ping during the past 992 `expiration_seconds`. 993 """ 994 # Refresh the up status if any of following conditions is true: 995 # * cached status is never set 996 # * cached status is False, so the method can check if the host is up 997 # again. 998 # * If the cached status is older than `expiration_seconds` 999 expire_time = time.time() - expiration_seconds 1000 if (self._cached_up_status_updated is None or 1001 not self._cached_up_status or 1002 self._cached_up_status_updated < expire_time): 1003 self._cached_up_status = self.is_up_fast() 1004 self._cached_up_status_updated = time.time() 1005 return self._cached_up_status 1006