1import os, time, socket, shutil, glob, logging, tempfile, re 2import shlex 3import subprocess 4 5from autotest_lib.client.bin.result_tools import runner as result_tools_runner 6from autotest_lib.client.common_lib import error 7from autotest_lib.client.common_lib.cros.network import ping_runner 8from autotest_lib.client.common_lib.global_config import global_config 9from autotest_lib.server import utils, autotest 10from autotest_lib.server.hosts import host_info 11from autotest_lib.server.hosts import remote 12from autotest_lib.server.hosts import rpc_server_tracker 13from autotest_lib.server.hosts import ssh_multiplex 14 15# pylint: disable=C0111 16 17get_value = global_config.get_config_value 18enable_master_ssh = get_value('AUTOSERV', 'enable_master_ssh', type=bool, 19 default=False) 20 21# Number of seconds to use the cached up status. 22_DEFAULT_UP_STATUS_EXPIRATION_SECONDS = 300 23_DEFAULT_SSH_PORT = 22 24 25# Number of seconds to wait for the host to shut down in wait_down(). 26_DEFAULT_WAIT_DOWN_TIME_SECONDS = 120 27 28# Timeout in seconds for a single call of get_boot_id() in wait_down(). 29_DEFAULT_MAX_PING_TIMEOUT = 10 30 31class AbstractSSHHost(remote.RemoteHost): 32 """ 33 This class represents a generic implementation of most of the 34 framework necessary for controlling a host via ssh. It implements 35 almost all of the abstract Host methods, except for the core 36 Host.run method. 37 """ 38 VERSION_PREFIX = '' 39 40 def _initialize(self, hostname, user="root", port=_DEFAULT_SSH_PORT, 41 password="", is_client_install_supported=True, 42 afe_host=None, host_info_store=None, connection_pool=None, 43 *args, **dargs): 44 super(AbstractSSHHost, self)._initialize(hostname=hostname, 45 *args, **dargs) 46 """ 47 @param hostname: The hostname of the host. 48 @param user: The username to use when ssh'ing into the host. 49 @param password: The password to use when ssh'ing into the host. 50 @param port: The port to use for ssh. 51 @param is_client_install_supported: Boolean to indicate if we can 52 install autotest on the host. 53 @param afe_host: The host object attained from the AFE (get_hosts). 54 @param host_info_store: Optional host_info.CachingHostInfoStore object 55 to obtain / update host information. 56 @param connection_pool: ssh_multiplex.ConnectionPool instance to share 57 the master ssh connection across control scripts. 58 """ 59 # IP address is retrieved only on demand. Otherwise the host 60 # initialization will fail for host is not online. 61 self._ip = None 62 self.user = user 63 self.port = port 64 self.password = password 65 self._is_client_install_supported = is_client_install_supported 66 self._use_rsync = None 67 self.known_hosts_file = tempfile.mkstemp()[1] 68 self._rpc_server_tracker = rpc_server_tracker.RpcServerTracker(self); 69 70 """ 71 Master SSH connection background job, socket temp directory and socket 72 control path option. If master-SSH is enabled, these fields will be 73 initialized by start_master_ssh when a new SSH connection is initiated. 74 """ 75 self._connection_pool = connection_pool 76 if connection_pool: 77 self._master_ssh = connection_pool.get(hostname, user, port) 78 else: 79 self._master_ssh = ssh_multiplex.MasterSsh(hostname, user, port) 80 81 self._afe_host = afe_host or utils.EmptyAFEHost() 82 self.host_info_store = (host_info_store or 83 host_info.InMemoryHostInfoStore()) 84 85 # The cached status of whether the DUT responded to ping. 86 self._cached_up_status = None 87 # The timestamp when the value of _cached_up_status is set. 88 self._cached_up_status_updated = None 89 90 91 @property 92 def ip(self): 93 """@return IP address of the host. 94 """ 95 if not self._ip: 96 self._ip = socket.getaddrinfo(self.hostname, None)[0][4][0] 97 return self._ip 98 99 100 @property 101 def is_client_install_supported(self): 102 """" 103 Returns True if the host supports autotest client installs, False 104 otherwise. 105 """ 106 return self._is_client_install_supported 107 108 109 @property 110 def rpc_server_tracker(self): 111 """" 112 @return The RPC server tracker associated with this host. 113 """ 114 return self._rpc_server_tracker 115 116 117 @property 118 def is_default_port(self): 119 """Returns True if its port is default SSH port.""" 120 return self.port == _DEFAULT_SSH_PORT 121 122 @property 123 def host_port(self): 124 """Returns hostname if port is default. Otherwise, hostname:port. 125 """ 126 if self.is_default_port: 127 return self.hostname 128 else: 129 return '%s:%d' % (self.hostname, self.port) 130 131 132 # Though it doesn't use self here, it is not declared as staticmethod 133 # because its subclass may use self to access member variables. 134 def make_ssh_command(self, user="root", port=_DEFAULT_SSH_PORT, opts='', 135 hosts_file='/dev/null', connect_timeout=30, 136 alive_interval=300, alive_count_max=3, 137 connection_attempts=1): 138 ssh_options = " ".join([ 139 opts, 140 self.make_ssh_options( 141 hosts_file=hosts_file, connect_timeout=connect_timeout, 142 alive_interval=alive_interval, alive_count_max=alive_count_max, 143 connection_attempts=connection_attempts)]) 144 return "/usr/bin/ssh -a -x %s -l %s -p %d" % (ssh_options, user, port) 145 146 147 @staticmethod 148 def make_ssh_options(hosts_file='/dev/null', connect_timeout=30, 149 alive_interval=300, alive_count_max=3, 150 connection_attempts=1): 151 """Composes SSH -o options.""" 152 assert isinstance(connect_timeout, (int, long)) 153 assert connect_timeout > 0 # can't disable the timeout 154 155 options = [("StrictHostKeyChecking", "no"), 156 ("UserKnownHostsFile", hosts_file), 157 ("BatchMode", "yes"), 158 ("ConnectTimeout", str(connect_timeout)), 159 ("ServerAliveInterval", str(alive_interval)), 160 ("ServerAliveCountMax", str(alive_count_max)), 161 ("ConnectionAttempts", str(connection_attempts))] 162 return " ".join("-o %s=%s" % kv for kv in options) 163 164 165 def use_rsync(self): 166 if self._use_rsync is not None: 167 return self._use_rsync 168 169 # Check if rsync is available on the remote host. If it's not, 170 # don't try to use it for any future file transfers. 171 self._use_rsync = self.check_rsync() 172 if not self._use_rsync: 173 logging.warning("rsync not available on remote host %s -- disabled", 174 self.host_port) 175 return self._use_rsync 176 177 178 def check_rsync(self): 179 """ 180 Check if rsync is available on the remote host. 181 """ 182 try: 183 self.run("rsync --version", stdout_tee=None, stderr_tee=None) 184 except error.AutoservRunError: 185 return False 186 return True 187 188 189 def _encode_remote_paths(self, paths, escape=True, use_scp=False): 190 """ 191 Given a list of file paths, encodes it as a single remote path, in 192 the style used by rsync and scp. 193 escape: add \\ to protect special characters. 194 use_scp: encode for scp if true, rsync if false. 195 """ 196 if escape: 197 paths = [utils.scp_remote_escape(path) for path in paths] 198 199 remote = self.hostname 200 201 # rsync and scp require IPv6 brackets, even when there isn't any 202 # trailing port number (ssh doesn't support IPv6 brackets). 203 # In the Python >= 3.3 future, 'import ipaddress' will parse addresses. 204 if re.search(r':.*:', remote): 205 remote = '[%s]' % remote 206 207 if use_scp: 208 return '%s@%s:"%s"' % (self.user, remote, " ".join(paths)) 209 else: 210 return '%s@%s:%s' % ( 211 self.user, remote, 212 " :".join('"%s"' % p for p in paths)) 213 214 def _encode_local_paths(self, paths, escape=True): 215 """ 216 Given a list of file paths, encodes it as a single local path. 217 escape: add \\ to protect special characters. 218 """ 219 if escape: 220 paths = [utils.sh_escape(path) for path in paths] 221 222 return " ".join('"%s"' % p for p in paths) 223 224 225 def rsync_options(self, delete_dest=False, preserve_symlinks=False, 226 safe_symlinks=False, excludes=None): 227 """Obtains rsync options for the remote.""" 228 ssh_cmd = self.make_ssh_command(user=self.user, port=self.port, 229 opts=self._master_ssh.ssh_option, 230 hosts_file=self.known_hosts_file) 231 if delete_dest: 232 delete_flag = "--delete" 233 else: 234 delete_flag = "" 235 if safe_symlinks: 236 symlink_flag = "-l --safe-links" 237 elif preserve_symlinks: 238 symlink_flag = "-l" 239 else: 240 symlink_flag = "-L" 241 exclude_args = '' 242 if excludes: 243 exclude_args = ' '.join( 244 ["--exclude '%s'" % exclude for exclude in excludes]) 245 return "%s %s --timeout=1800 --rsh='%s' -az --no-o --no-g %s" % ( 246 symlink_flag, delete_flag, ssh_cmd, exclude_args) 247 248 249 def _make_rsync_cmd(self, sources, dest, delete_dest, 250 preserve_symlinks, safe_symlinks, excludes=None): 251 """ 252 Given a string of source paths and a destination path, produces the 253 appropriate rsync command for copying them. Remote paths must be 254 pre-encoded. 255 """ 256 rsync_options = self.rsync_options( 257 delete_dest=delete_dest, preserve_symlinks=preserve_symlinks, 258 safe_symlinks=safe_symlinks, excludes=excludes) 259 return 'rsync %s %s "%s"' % (rsync_options, sources, dest) 260 261 262 def _make_ssh_cmd(self, cmd): 263 """ 264 Create a base ssh command string for the host which can be used 265 to run commands directly on the machine 266 """ 267 base_cmd = self.make_ssh_command(user=self.user, port=self.port, 268 opts=self._master_ssh.ssh_option, 269 hosts_file=self.known_hosts_file) 270 271 return '%s %s "%s"' % (base_cmd, self.hostname, utils.sh_escape(cmd)) 272 273 def _make_scp_cmd(self, sources, dest): 274 """ 275 Given a string of source paths and a destination path, produces the 276 appropriate scp command for encoding it. Remote paths must be 277 pre-encoded. 278 """ 279 command = ("scp -rq %s -o StrictHostKeyChecking=no " 280 "-o UserKnownHostsFile=%s -P %d %s '%s'") 281 return command % (self._master_ssh.ssh_option, self.known_hosts_file, 282 self.port, sources, dest) 283 284 285 def _make_rsync_compatible_globs(self, path, is_local): 286 """ 287 Given an rsync-style path, returns a list of globbed paths 288 that will hopefully provide equivalent behaviour for scp. Does not 289 support the full range of rsync pattern matching behaviour, only that 290 exposed in the get/send_file interface (trailing slashes). 291 292 The is_local param is flag indicating if the paths should be 293 interpreted as local or remote paths. 294 """ 295 296 # non-trailing slash paths should just work 297 if len(path) == 0 or path[-1] != "/": 298 return [path] 299 300 # make a function to test if a pattern matches any files 301 if is_local: 302 def glob_matches_files(path, pattern): 303 return len(glob.glob(path + pattern)) > 0 304 else: 305 def glob_matches_files(path, pattern): 306 result = self.run("ls \"%s\"%s" % (utils.sh_escape(path), 307 pattern), 308 stdout_tee=None, ignore_status=True) 309 return result.exit_status == 0 310 311 # take a set of globs that cover all files, and see which are needed 312 patterns = ["*", ".[!.]*"] 313 patterns = [p for p in patterns if glob_matches_files(path, p)] 314 315 # convert them into a set of paths suitable for the commandline 316 if is_local: 317 return ["\"%s\"%s" % (utils.sh_escape(path), pattern) 318 for pattern in patterns] 319 else: 320 return [utils.scp_remote_escape(path) + pattern 321 for pattern in patterns] 322 323 324 def _make_rsync_compatible_source(self, source, is_local): 325 """ 326 Applies the same logic as _make_rsync_compatible_globs, but 327 applies it to an entire list of sources, producing a new list of 328 sources, properly quoted. 329 """ 330 return sum((self._make_rsync_compatible_globs(path, is_local) 331 for path in source), []) 332 333 334 def _set_umask_perms(self, dest): 335 """ 336 Given a destination file/dir (recursively) set the permissions on 337 all the files and directories to the max allowed by running umask. 338 """ 339 340 # now this looks strange but I haven't found a way in Python to _just_ 341 # get the umask, apparently the only option is to try to set it 342 umask = os.umask(0) 343 os.umask(umask) 344 345 max_privs = 0777 & ~umask 346 347 def set_file_privs(filename): 348 """Sets mode of |filename|. Assumes |filename| exists.""" 349 file_stat = os.stat(filename) 350 351 file_privs = max_privs 352 # if the original file permissions do not have at least one 353 # executable bit then do not set it anywhere 354 if not file_stat.st_mode & 0111: 355 file_privs &= ~0111 356 357 os.chmod(filename, file_privs) 358 359 # try a bottom-up walk so changes on directory permissions won't cut 360 # our access to the files/directories inside it 361 for root, dirs, files in os.walk(dest, topdown=False): 362 # when setting the privileges we emulate the chmod "X" behaviour 363 # that sets to execute only if it is a directory or any of the 364 # owner/group/other already has execute right 365 for dirname in dirs: 366 os.chmod(os.path.join(root, dirname), max_privs) 367 368 # Filter out broken symlinks as we go. 369 for filename in filter(os.path.exists, files): 370 set_file_privs(os.path.join(root, filename)) 371 372 373 # now set privs for the dest itself 374 if os.path.isdir(dest): 375 os.chmod(dest, max_privs) 376 else: 377 set_file_privs(dest) 378 379 380 def get_file(self, source, dest, delete_dest=False, preserve_perm=True, 381 preserve_symlinks=False, retry=True, safe_symlinks=False): 382 """ 383 Copy files from the remote host to a local path. 384 385 Directories will be copied recursively. 386 If a source component is a directory with a trailing slash, 387 the content of the directory will be copied, otherwise, the 388 directory itself and its content will be copied. This 389 behavior is similar to that of the program 'rsync'. 390 391 Args: 392 source: either 393 1) a single file or directory, as a string 394 2) a list of one or more (possibly mixed) 395 files or directories 396 dest: a file or a directory (if source contains a 397 directory or more than one element, you must 398 supply a directory dest) 399 delete_dest: if this is true, the command will also clear 400 out any old files at dest that are not in the 401 source 402 preserve_perm: tells get_file() to try to preserve the sources 403 permissions on files and dirs 404 preserve_symlinks: try to preserve symlinks instead of 405 transforming them into files/dirs on copy 406 safe_symlinks: same as preserve_symlinks, but discard links 407 that may point outside the copied tree 408 Raises: 409 AutoservRunError: the scp command failed 410 """ 411 logging.debug('get_file. source: %s, dest: %s, delete_dest: %s,' 412 'preserve_perm: %s, preserve_symlinks:%s', source, dest, 413 delete_dest, preserve_perm, preserve_symlinks) 414 415 # Start a master SSH connection if necessary. 416 self.start_master_ssh() 417 418 if isinstance(source, basestring): 419 source = [source] 420 dest = os.path.abspath(dest) 421 422 # If rsync is disabled or fails, try scp. 423 try_scp = True 424 if self.use_rsync(): 425 logging.debug('Using Rsync.') 426 try: 427 remote_source = self._encode_remote_paths(source) 428 local_dest = utils.sh_escape(dest) 429 rsync = self._make_rsync_cmd(remote_source, local_dest, 430 delete_dest, preserve_symlinks, 431 safe_symlinks) 432 utils.run(rsync) 433 try_scp = False 434 except error.CmdError, e: 435 # retry on rsync exit values which may be caused by transient 436 # network problems: 437 # 438 # rc 10: Error in socket I/O 439 # rc 12: Error in rsync protocol data stream 440 # rc 23: Partial transfer due to error 441 # rc 255: Ssh error 442 # 443 # Note that rc 23 includes dangling symlinks. In this case 444 # retrying is useless, but not very damaging since rsync checks 445 # for those before starting the transfer (scp does not). 446 status = e.result_obj.exit_status 447 if status in [10, 12, 23, 255] and retry: 448 logging.warning('rsync status %d, retrying', status) 449 self.get_file(source, dest, delete_dest, preserve_perm, 450 preserve_symlinks, retry=False) 451 # The nested get_file() does all that's needed. 452 return 453 else: 454 logging.warning("trying scp, rsync failed: %s (%d)", 455 e, status) 456 457 if try_scp: 458 logging.debug('Trying scp.') 459 # scp has no equivalent to --delete, just drop the entire dest dir 460 if delete_dest and os.path.isdir(dest): 461 shutil.rmtree(dest) 462 os.mkdir(dest) 463 464 remote_source = self._make_rsync_compatible_source(source, False) 465 if remote_source: 466 # _make_rsync_compatible_source() already did the escaping 467 remote_source = self._encode_remote_paths( 468 remote_source, escape=False, use_scp=True) 469 local_dest = utils.sh_escape(dest) 470 scp = self._make_scp_cmd(remote_source, local_dest) 471 try: 472 utils.run(scp) 473 except error.CmdError, e: 474 logging.debug('scp failed: %s', e) 475 raise error.AutoservRunError(e.args[0], e.args[1]) 476 477 if not preserve_perm: 478 # we have no way to tell scp to not try to preserve the 479 # permissions so set them after copy instead. 480 # for rsync we could use "--no-p --chmod=ugo=rwX" but those 481 # options are only in very recent rsync versions 482 self._set_umask_perms(dest) 483 484 485 def send_file(self, source, dest, delete_dest=False, 486 preserve_symlinks=False, excludes=None): 487 """ 488 Copy files from a local path to the remote host. 489 490 Directories will be copied recursively. 491 If a source component is a directory with a trailing slash, 492 the content of the directory will be copied, otherwise, the 493 directory itself and its content will be copied. This 494 behavior is similar to that of the program 'rsync'. 495 496 Args: 497 source: either 498 1) a single file or directory, as a string 499 2) a list of one or more (possibly mixed) 500 files or directories 501 dest: a file or a directory (if source contains a 502 directory or more than one element, you must 503 supply a directory dest) 504 delete_dest: if this is true, the command will also clear 505 out any old files at dest that are not in the 506 source 507 preserve_symlinks: controls if symlinks on the source will be 508 copied as such on the destination or transformed into the 509 referenced file/directory 510 excludes: A list of file pattern that matches files not to be 511 sent. `send_file` will fail if exclude is set, since 512 local copy does not support --exclude, e.g., when 513 using scp to copy file. 514 515 Raises: 516 AutoservRunError: the scp command failed 517 """ 518 logging.debug('send_file. source: %s, dest: %s, delete_dest: %s,' 519 'preserve_symlinks:%s', source, dest, 520 delete_dest, preserve_symlinks) 521 # Start a master SSH connection if necessary. 522 self.start_master_ssh() 523 524 if isinstance(source, basestring): 525 source = [source] 526 527 local_sources = self._encode_local_paths(source) 528 if not local_sources: 529 raise error.TestError('source |%s| yielded an empty string' % ( 530 source)) 531 if local_sources.find('\x00') != -1: 532 raise error.TestError('one or more sources include NUL char') 533 534 # If rsync is disabled or fails, try scp. 535 try_scp = True 536 if self.use_rsync(): 537 logging.debug('Using Rsync.') 538 remote_dest = self._encode_remote_paths([dest]) 539 try: 540 rsync = self._make_rsync_cmd(local_sources, remote_dest, 541 delete_dest, preserve_symlinks, 542 False, excludes=excludes) 543 utils.run(rsync) 544 try_scp = False 545 except error.CmdError, e: 546 logging.warning("trying scp, rsync failed: %s", e) 547 548 if try_scp: 549 logging.debug('Trying scp.') 550 if excludes: 551 raise error.AutotestHostRunError( 552 '--exclude is not supported in scp, try to use rsync. ' 553 'excludes: %s' % ','.join(excludes), None) 554 # scp has no equivalent to --delete, just drop the entire dest dir 555 if delete_dest: 556 is_dir = self.run("ls -d %s/" % dest, 557 ignore_status=True).exit_status == 0 558 if is_dir: 559 cmd = "rm -rf %s && mkdir %s" 560 cmd %= (dest, dest) 561 self.run(cmd) 562 563 remote_dest = self._encode_remote_paths([dest], use_scp=True) 564 local_sources = self._make_rsync_compatible_source(source, True) 565 if local_sources: 566 sources = self._encode_local_paths(local_sources, escape=False) 567 scp = self._make_scp_cmd(sources, remote_dest) 568 try: 569 utils.run(scp) 570 except error.CmdError, e: 571 logging.debug('scp failed: %s', e) 572 raise error.AutoservRunError(e.args[0], e.args[1]) 573 else: 574 logging.debug('skipping scp for empty source list') 575 576 577 def verify_ssh_user_access(self): 578 """Verify ssh access to this host. 579 580 @returns False if ssh_ping fails due to Permissions error, True 581 otherwise. 582 """ 583 try: 584 self.ssh_ping() 585 except (error.AutoservSshPermissionDeniedError, 586 error.AutoservSshPingHostError): 587 return False 588 return True 589 590 591 def ssh_ping(self, timeout=60, connect_timeout=None, base_cmd='true'): 592 """ 593 Pings remote host via ssh. 594 595 @param timeout: Time in seconds before giving up. 596 Defaults to 60 seconds. 597 @param base_cmd: The base command to run with the ssh ping. 598 Defaults to true. 599 @raise AutoservSSHTimeout: If the ssh ping times out. 600 @raise AutoservSshPermissionDeniedError: If ssh ping fails due to 601 permissions. 602 @raise AutoservSshPingHostError: For other AutoservRunErrors. 603 """ 604 ctimeout = min(timeout, connect_timeout or timeout) 605 try: 606 self.run(base_cmd, timeout=timeout, connect_timeout=ctimeout, 607 ssh_failure_retry_ok=True) 608 except error.AutoservSSHTimeout: 609 msg = "Host (ssh) verify timed out (timeout = %d)" % timeout 610 raise error.AutoservSSHTimeout(msg) 611 except error.AutoservSshPermissionDeniedError: 612 #let AutoservSshPermissionDeniedError be visible to the callers 613 raise 614 except error.AutoservRunError, e: 615 # convert the generic AutoservRunError into something more 616 # specific for this context 617 raise error.AutoservSshPingHostError(e.description + '\n' + 618 repr(e.result_obj)) 619 620 621 def is_up(self, timeout=60, connect_timeout=None, base_cmd='true'): 622 """ 623 Check if the remote host is up by ssh-ing and running a base command. 624 625 @param timeout: timeout in seconds. 626 @param base_cmd: a base command to run with ssh. The default is 'true'. 627 @returns True if the remote host is up before the timeout expires, 628 False otherwise. 629 """ 630 try: 631 self.ssh_ping(timeout=timeout, 632 connect_timeout=connect_timeout, 633 base_cmd=base_cmd) 634 except error.AutoservError: 635 return False 636 else: 637 return True 638 639 640 def is_up_fast(self): 641 """Return True if the host can be pinged.""" 642 ping_config = ping_runner.PingConfig( 643 self.hostname, count=3, ignore_result=True, ignore_status=True) 644 return ping_runner.PingRunner().ping(ping_config).received > 0 645 646 647 def wait_up(self, timeout=None): 648 """ 649 Wait until the remote host is up or the timeout expires. 650 651 In fact, it will wait until an ssh connection to the remote 652 host can be established, and getty is running. 653 654 @param timeout time limit in seconds before returning even 655 if the host is not up. 656 657 @returns True if the host was found to be up before the timeout expires, 658 False otherwise 659 """ 660 if timeout: 661 current_time = int(time.time()) 662 end_time = current_time + timeout 663 664 autoserv_error_logged = False 665 while not timeout or current_time < end_time: 666 if self.is_up(timeout=end_time - current_time, 667 connect_timeout=20): 668 try: 669 if self.are_wait_up_processes_up(): 670 logging.debug('Host %s is now up', self.host_port) 671 return True 672 except error.AutoservError as e: 673 if not autoserv_error_logged: 674 logging.debug('Ignoring failure to reach %s: %s %s', 675 self.host_port, e, 676 '(and further similar failures)') 677 autoserv_error_logged = True 678 time.sleep(1) 679 current_time = int(time.time()) 680 681 logging.debug('Host %s is still down after waiting %d seconds', 682 self.host_port, int(timeout + time.time() - end_time)) 683 return False 684 685 686 def wait_down(self, timeout=_DEFAULT_WAIT_DOWN_TIME_SECONDS, 687 warning_timer=None, old_boot_id=None, 688 max_ping_timeout=_DEFAULT_MAX_PING_TIMEOUT): 689 """ 690 Wait until the remote host is down or the timeout expires. 691 692 If old_boot_id is provided, waits until either the machine is 693 unpingable or self.get_boot_id() returns a value different from 694 old_boot_id. If the boot_id value has changed then the function 695 returns True under the assumption that the machine has shut down 696 and has now already come back up. 697 698 If old_boot_id is None then until the machine becomes unreachable the 699 method assumes the machine has not yet shut down. 700 701 @param timeout Time limit in seconds before returning even if the host 702 is still up. 703 @param warning_timer Time limit in seconds that will generate a warning 704 if the host is not down yet. Can be None for no warning. 705 @param old_boot_id A string containing the result of self.get_boot_id() 706 prior to the host being told to shut down. Can be None if this is 707 not available. 708 @param max_ping_timeout Maximum timeout in seconds for each 709 self.get_boot_id() call. If this timeout is hit, it is assumed that 710 the host went down and became unreachable. 711 712 @returns True if the host was found to be down (max_ping_timeout timeout 713 expired or boot_id changed if provided) and False if timeout 714 expired. 715 """ 716 #TODO: there is currently no way to distinguish between knowing 717 #TODO: boot_id was unsupported and not knowing the boot_id. 718 current_time = int(time.time()) 719 end_time = current_time + timeout 720 721 if warning_timer: 722 warn_time = current_time + warning_timer 723 724 if old_boot_id is not None: 725 logging.debug('Host %s pre-shutdown boot_id is %s', 726 self.host_port, old_boot_id) 727 728 # Impose semi real-time deadline constraints, since some clients 729 # (eg: watchdog timer tests) expect strict checking of time elapsed. 730 # Each iteration of this loop is treated as though it atomically 731 # completes within current_time, this is needed because if we used 732 # inline time.time() calls instead then the following could happen: 733 # 734 # while time.time() < end_time: [23 < 30] 735 # some code. [takes 10 secs] 736 # try: 737 # new_boot_id = self.get_boot_id(timeout=end_time - time.time()) 738 # [30 - 33] 739 # The last step will lead to a return True, when in fact the machine 740 # went down at 32 seconds (>30). Hence we need to pass get_boot_id 741 # the same time that allowed us into that iteration of the loop. 742 while current_time < end_time: 743 ping_timeout = min(end_time - current_time, max_ping_timeout) 744 try: 745 new_boot_id = self.get_boot_id(timeout=ping_timeout) 746 except error.AutoservError: 747 logging.debug('Host %s is now unreachable over ssh, is down', 748 self.host_port) 749 return True 750 else: 751 # if the machine is up but the boot_id value has changed from 752 # old boot id, then we can assume the machine has gone down 753 # and then already come back up 754 if old_boot_id is not None and old_boot_id != new_boot_id: 755 logging.debug('Host %s now has boot_id %s and so must ' 756 'have rebooted', self.host_port, new_boot_id) 757 return True 758 759 if warning_timer and current_time > warn_time: 760 self.record("INFO", None, "shutdown", 761 "Shutdown took longer than %ds" % warning_timer) 762 # Print the warning only once. 763 warning_timer = None 764 # If a machine is stuck switching runlevels 765 # This may cause the machine to reboot. 766 self.run('kill -HUP 1', ignore_status=True) 767 768 time.sleep(1) 769 current_time = int(time.time()) 770 771 return False 772 773 774 # tunable constants for the verify & repair code 775 AUTOTEST_GB_DISKSPACE_REQUIRED = get_value("SERVER", 776 "gb_diskspace_required", 777 type=float, 778 default=20.0) 779 780 781 def verify_connectivity(self): 782 super(AbstractSSHHost, self).verify_connectivity() 783 784 logging.info('Pinging host ' + self.host_port) 785 self.ssh_ping() 786 logging.info("Host (ssh) %s is alive", self.host_port) 787 788 if self.is_shutting_down(): 789 raise error.AutoservHostIsShuttingDownError("Host is shutting down") 790 791 792 def verify_software(self): 793 super(AbstractSSHHost, self).verify_software() 794 try: 795 self.check_diskspace(autotest.Autotest.get_install_dir(self), 796 self.AUTOTEST_GB_DISKSPACE_REQUIRED) 797 except error.AutoservDiskFullHostError: 798 # only want to raise if it's a space issue 799 raise 800 except (error.AutoservHostError, autotest.AutodirNotFoundError): 801 logging.exception('autodir space check exception, this is probably ' 802 'safe to ignore\n') 803 804 805 def close(self): 806 super(AbstractSSHHost, self).close() 807 self.rpc_server_tracker.disconnect_all() 808 if not self._connection_pool: 809 self._master_ssh.close() 810 if os.path.exists(self.known_hosts_file): 811 os.remove(self.known_hosts_file) 812 813 814 def restart_master_ssh(self): 815 """ 816 Stop and restart the ssh master connection. This is meant as a last 817 resort when ssh commands fail and we don't understand why. 818 """ 819 logging.debug('Restarting master ssh connection') 820 self._master_ssh.close() 821 self._master_ssh.maybe_start(timeout=30) 822 823 824 825 def start_master_ssh(self, timeout=5): 826 """ 827 Called whenever a slave SSH connection needs to be initiated (e.g., by 828 run, rsync, scp). If master SSH support is enabled and a master SSH 829 connection is not active already, start a new one in the background. 830 Also, cleanup any zombie master SSH connections (e.g., dead due to 831 reboot). 832 833 timeout: timeout in seconds (default 5) to wait for master ssh 834 connection to be established. If timeout is reached, a 835 warning message is logged, but no other action is taken. 836 """ 837 if not enable_master_ssh: 838 return 839 self._master_ssh.maybe_start(timeout=timeout) 840 841 842 def clear_known_hosts(self): 843 """Clears out the temporary ssh known_hosts file. 844 845 This is useful if the test SSHes to the machine, then reinstalls it, 846 then SSHes to it again. It can be called after the reinstall to 847 reduce the spam in the logs. 848 """ 849 logging.info("Clearing known hosts for host '%s', file '%s'.", 850 self.host_port, self.known_hosts_file) 851 # Clear out the file by opening it for writing and then closing. 852 fh = open(self.known_hosts_file, "w") 853 fh.close() 854 855 856 def collect_logs(self, remote_src_dir, local_dest_dir, ignore_errors=True): 857 """Copy log directories from a host to a local directory. 858 859 @param remote_src_dir: A destination directory on the host. 860 @param local_dest_dir: A path to a local destination directory. 861 If it doesn't exist it will be created. 862 @param ignore_errors: If True, ignore exceptions. 863 864 @raises OSError: If there were problems creating the local_dest_dir and 865 ignore_errors is False. 866 @raises AutoservRunError, AutotestRunError: If something goes wrong 867 while copying the directories and ignore_errors is False. 868 """ 869 if not self.check_cached_up_status(): 870 logging.warning('Host %s did not answer to ping, skip collecting ' 871 'logs.', self.host_port) 872 return 873 874 locally_created_dest = False 875 if (not os.path.exists(local_dest_dir) 876 or not os.path.isdir(local_dest_dir)): 877 try: 878 os.makedirs(local_dest_dir) 879 locally_created_dest = True 880 except OSError as e: 881 logging.warning('Unable to collect logs from host ' 882 '%s: %s', self.host_port, e) 883 if not ignore_errors: 884 raise 885 return 886 887 # Build test result directory summary 888 try: 889 result_tools_runner.run_on_client(self, remote_src_dir) 890 except (error.AutotestRunError, error.AutoservRunError, 891 error.AutoservSSHTimeout) as e: 892 logging.exception( 893 'Non-critical failure: Failed to collect and throttle ' 894 'results at %s from host %s', remote_src_dir, 895 self.host_port) 896 897 try: 898 self.get_file(remote_src_dir, local_dest_dir, safe_symlinks=True) 899 except (error.AutotestRunError, error.AutoservRunError, 900 error.AutoservSSHTimeout) as e: 901 logging.warning('Collection of %s to local dir %s from host %s ' 902 'failed: %s', remote_src_dir, local_dest_dir, 903 self.host_port, e) 904 if locally_created_dest: 905 shutil.rmtree(local_dest_dir, ignore_errors=ignore_errors) 906 if not ignore_errors: 907 raise 908 909 # Clean up directory summary file on the client side. 910 try: 911 result_tools_runner.run_on_client(self, remote_src_dir, 912 cleanup_only=True) 913 except (error.AutotestRunError, error.AutoservRunError, 914 error.AutoservSSHTimeout) as e: 915 logging.exception( 916 'Non-critical failure: Failed to cleanup result summary ' 917 'files at %s in host %s', remote_src_dir, self.hostname) 918 919 920 def create_ssh_tunnel(self, port, local_port): 921 """Create an ssh tunnel from local_port to port. 922 923 This is used to forward a port securely through a tunnel process from 924 the server to the DUT for RPC server connection. 925 926 @param port: remote port on the host. 927 @param local_port: local forwarding port. 928 929 @return: the tunnel process. 930 """ 931 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port) 932 ssh_cmd = self.make_ssh_command(opts=tunnel_options, port=self.port) 933 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname) 934 logging.debug('Full tunnel command: %s', tunnel_cmd) 935 # Exec the ssh process directly here rather than using a shell. 936 # Using a shell leaves a dangling ssh process, because we deliver 937 # signals to the shell wrapping ssh, not the ssh process itself. 938 args = shlex.split(tunnel_cmd) 939 tunnel_proc = subprocess.Popen(args, close_fds=True) 940 logging.debug('Started ssh tunnel, local = %d' 941 ' remote = %d, pid = %d', 942 local_port, port, tunnel_proc.pid) 943 return tunnel_proc 944 945 946 def disconnect_ssh_tunnel(self, tunnel_proc, port): 947 """ 948 Disconnects a previously forwarded port from the server to the DUT for 949 RPC server connection. 950 951 @param tunnel_proc: a tunnel process returned from |create_ssh_tunnel|. 952 @param port: remote port on the DUT, used in ADBHost. 953 954 """ 955 if tunnel_proc.poll() is None: 956 tunnel_proc.terminate() 957 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid) 958 else: 959 logging.debug('Tunnel pid %d terminated early, status %d', 960 tunnel_proc.pid, tunnel_proc.returncode) 961 962 963 def get_os_type(self): 964 """Returns the host OS descriptor (to be implemented in subclasses). 965 966 @return A string describing the OS type. 967 """ 968 raise NotImplementedError 969 970 971 def check_cached_up_status( 972 self, expiration_seconds=_DEFAULT_UP_STATUS_EXPIRATION_SECONDS): 973 """Check if the DUT responded to ping in the past `expiration_seconds`. 974 975 @param expiration_seconds: The number of seconds to keep the cached 976 status of whether the DUT responded to ping. 977 @return: True if the DUT has responded to ping during the past 978 `expiration_seconds`. 979 """ 980 # Refresh the up status if any of following conditions is true: 981 # * cached status is never set 982 # * cached status is False, so the method can check if the host is up 983 # again. 984 # * If the cached status is older than `expiration_seconds` 985 expire_time = time.time() - expiration_seconds 986 if (self._cached_up_status_updated is None or 987 not self._cached_up_status or 988 self._cached_up_status_updated < expire_time): 989 self._cached_up_status = self.is_up_fast() 990 self._cached_up_status_updated = time.time() 991 return self._cached_up_status 992