1import os, time, socket, shutil, glob, logging, tempfile, re 2import shlex 3import subprocess 4 5from autotest_lib.client.bin.result_tools import runner as result_tools_runner 6from autotest_lib.client.common_lib import error 7from autotest_lib.client.common_lib.cros.network import ping_runner 8from autotest_lib.client.common_lib.global_config import global_config 9from autotest_lib.server import utils, autotest 10from autotest_lib.server.hosts import host_info 11from autotest_lib.server.hosts import remote 12from autotest_lib.server.hosts import rpc_server_tracker 13from autotest_lib.server.hosts import ssh_multiplex 14 15# pylint: disable=C0111 16 17get_value = global_config.get_config_value 18enable_master_ssh = get_value('AUTOSERV', 'enable_master_ssh', type=bool, 19 default=False) 20 21# Number of seconds to use the cached up status. 22_DEFAULT_UP_STATUS_EXPIRATION_SECONDS = 300 23_DEFAULT_SSH_PORT = 22 24 25# Number of seconds to wait for the host to shut down in wait_down(). 26_DEFAULT_WAIT_DOWN_TIME_SECONDS = 120 27 28# Number of seconds to wait for the host to boot up in wait_up(). 29_DEFAULT_WAIT_UP_TIME_SECONDS = 120 30 31# Timeout in seconds for a single call of get_boot_id() in wait_down() 32# and a single ssh ping in wait_up(). 33_DEFAULT_MAX_PING_TIMEOUT = 10 34 35class AbstractSSHHost(remote.RemoteHost): 36 """ 37 This class represents a generic implementation of most of the 38 framework necessary for controlling a host via ssh. It implements 39 almost all of the abstract Host methods, except for the core 40 Host.run method. 41 """ 42 VERSION_PREFIX = '' 43 # Timeout for master ssh connection setup, in seconds. 44 DEFAULT_START_MASTER_SSH_TIMEOUT_S = 5 45 46 def _initialize(self, hostname, user="root", port=_DEFAULT_SSH_PORT, 47 password="", is_client_install_supported=True, 48 afe_host=None, host_info_store=None, connection_pool=None, 49 *args, **dargs): 50 super(AbstractSSHHost, self)._initialize(hostname=hostname, 51 *args, **dargs) 52 """ 53 @param hostname: The hostname of the host. 54 @param user: The username to use when ssh'ing into the host. 55 @param password: The password to use when ssh'ing into the host. 56 @param port: The port to use for ssh. 57 @param is_client_install_supported: Boolean to indicate if we can 58 install autotest on the host. 59 @param afe_host: The host object attained from the AFE (get_hosts). 60 @param host_info_store: Optional host_info.CachingHostInfoStore object 61 to obtain / update host information. 62 @param connection_pool: ssh_multiplex.ConnectionPool instance to share 63 the master ssh connection across control scripts. 64 """ 65 # IP address is retrieved only on demand. Otherwise the host 66 # initialization will fail for host is not online. 67 self._ip = None 68 self.user = user 69 self.port = port 70 self.password = password 71 self._is_client_install_supported = is_client_install_supported 72 self._use_rsync = None 73 self.known_hosts_file = tempfile.mkstemp()[1] 74 self._rpc_server_tracker = rpc_server_tracker.RpcServerTracker(self); 75 76 """ 77 Master SSH connection background job, socket temp directory and socket 78 control path option. If master-SSH is enabled, these fields will be 79 initialized by start_master_ssh when a new SSH connection is initiated. 80 """ 81 self._connection_pool = connection_pool 82 if connection_pool: 83 self._master_ssh = connection_pool.get(hostname, user, port) 84 else: 85 self._master_ssh = ssh_multiplex.MasterSsh(hostname, user, port) 86 87 self._afe_host = afe_host or utils.EmptyAFEHost() 88 self.host_info_store = (host_info_store or 89 host_info.InMemoryHostInfoStore()) 90 91 # The cached status of whether the DUT responded to ping. 92 self._cached_up_status = None 93 # The timestamp when the value of _cached_up_status is set. 94 self._cached_up_status_updated = None 95 96 97 @property 98 def ip(self): 99 """@return IP address of the host. 100 """ 101 if not self._ip: 102 self._ip = socket.getaddrinfo(self.hostname, None)[0][4][0] 103 return self._ip 104 105 106 @property 107 def is_client_install_supported(self): 108 """" 109 Returns True if the host supports autotest client installs, False 110 otherwise. 111 """ 112 return self._is_client_install_supported 113 114 115 @property 116 def rpc_server_tracker(self): 117 """" 118 @return The RPC server tracker associated with this host. 119 """ 120 return self._rpc_server_tracker 121 122 123 @property 124 def is_default_port(self): 125 """Returns True if its port is default SSH port.""" 126 return self.port == _DEFAULT_SSH_PORT 127 128 @property 129 def host_port(self): 130 """Returns hostname if port is default. Otherwise, hostname:port. 131 """ 132 if self.is_default_port: 133 return self.hostname 134 else: 135 return '%s:%d' % (self.hostname, self.port) 136 137 138 # Though it doesn't use self here, it is not declared as staticmethod 139 # because its subclass may use self to access member variables. 140 def make_ssh_command(self, user="root", port=_DEFAULT_SSH_PORT, opts='', 141 hosts_file='/dev/null', connect_timeout=30, 142 alive_interval=300, alive_count_max=3, 143 connection_attempts=1): 144 ssh_options = " ".join([ 145 opts, 146 self.make_ssh_options( 147 hosts_file=hosts_file, connect_timeout=connect_timeout, 148 alive_interval=alive_interval, alive_count_max=alive_count_max, 149 connection_attempts=connection_attempts)]) 150 return "/usr/bin/ssh -a -x %s -l %s -p %d" % (ssh_options, user, port) 151 152 153 @staticmethod 154 def make_ssh_options(hosts_file='/dev/null', connect_timeout=30, 155 alive_interval=300, alive_count_max=3, 156 connection_attempts=1): 157 """Composes SSH -o options.""" 158 assert isinstance(connect_timeout, (int, long)) 159 assert connect_timeout > 0 # can't disable the timeout 160 161 options = [("StrictHostKeyChecking", "no"), 162 ("UserKnownHostsFile", hosts_file), 163 ("BatchMode", "yes"), 164 ("ConnectTimeout", str(connect_timeout)), 165 ("ServerAliveInterval", str(alive_interval)), 166 ("ServerAliveCountMax", str(alive_count_max)), 167 ("ConnectionAttempts", str(connection_attempts))] 168 return " ".join("-o %s=%s" % kv for kv in options) 169 170 171 def use_rsync(self): 172 if self._use_rsync is not None: 173 return self._use_rsync 174 175 # Check if rsync is available on the remote host. If it's not, 176 # don't try to use it for any future file transfers. 177 self._use_rsync = self.check_rsync() 178 if not self._use_rsync: 179 logging.warning("rsync not available on remote host %s -- disabled", 180 self.host_port) 181 return self._use_rsync 182 183 184 def check_rsync(self): 185 """ 186 Check if rsync is available on the remote host. 187 """ 188 try: 189 self.run("rsync --version", stdout_tee=None, stderr_tee=None) 190 except error.AutoservRunError: 191 return False 192 return True 193 194 195 def _encode_remote_paths(self, paths, escape=True, use_scp=False): 196 """ 197 Given a list of file paths, encodes it as a single remote path, in 198 the style used by rsync and scp. 199 escape: add \\ to protect special characters. 200 use_scp: encode for scp if true, rsync if false. 201 """ 202 if escape: 203 paths = [utils.scp_remote_escape(path) for path in paths] 204 205 remote = self.hostname 206 207 # rsync and scp require IPv6 brackets, even when there isn't any 208 # trailing port number (ssh doesn't support IPv6 brackets). 209 # In the Python >= 3.3 future, 'import ipaddress' will parse addresses. 210 if re.search(r':.*:', remote): 211 remote = '[%s]' % remote 212 213 if use_scp: 214 return '%s@%s:"%s"' % (self.user, remote, " ".join(paths)) 215 else: 216 return '%s@%s:%s' % ( 217 self.user, remote, 218 " :".join('"%s"' % p for p in paths)) 219 220 def _encode_local_paths(self, paths, escape=True): 221 """ 222 Given a list of file paths, encodes it as a single local path. 223 escape: add \\ to protect special characters. 224 """ 225 if escape: 226 paths = [utils.sh_escape(path) for path in paths] 227 228 return " ".join('"%s"' % p for p in paths) 229 230 231 def rsync_options(self, delete_dest=False, preserve_symlinks=False, 232 safe_symlinks=False, excludes=None): 233 """Obtains rsync options for the remote.""" 234 ssh_cmd = self.make_ssh_command(user=self.user, port=self.port, 235 opts=self._master_ssh.ssh_option, 236 hosts_file=self.known_hosts_file) 237 if delete_dest: 238 delete_flag = "--delete" 239 else: 240 delete_flag = "" 241 if safe_symlinks: 242 symlink_flag = "-l --safe-links" 243 elif preserve_symlinks: 244 symlink_flag = "-l" 245 else: 246 symlink_flag = "-L" 247 exclude_args = '' 248 if excludes: 249 exclude_args = ' '.join( 250 ["--exclude '%s'" % exclude for exclude in excludes]) 251 return "%s %s --timeout=1800 --rsh='%s' -az --no-o --no-g %s" % ( 252 symlink_flag, delete_flag, ssh_cmd, exclude_args) 253 254 255 def _make_rsync_cmd(self, sources, dest, delete_dest, 256 preserve_symlinks, safe_symlinks, excludes=None): 257 """ 258 Given a string of source paths and a destination path, produces the 259 appropriate rsync command for copying them. Remote paths must be 260 pre-encoded. 261 """ 262 rsync_options = self.rsync_options( 263 delete_dest=delete_dest, preserve_symlinks=preserve_symlinks, 264 safe_symlinks=safe_symlinks, excludes=excludes) 265 return 'rsync %s %s "%s"' % (rsync_options, sources, dest) 266 267 268 def _make_ssh_cmd(self, cmd): 269 """ 270 Create a base ssh command string for the host which can be used 271 to run commands directly on the machine 272 """ 273 base_cmd = self.make_ssh_command(user=self.user, port=self.port, 274 opts=self._master_ssh.ssh_option, 275 hosts_file=self.known_hosts_file) 276 277 return '%s %s "%s"' % (base_cmd, self.hostname, utils.sh_escape(cmd)) 278 279 def _make_scp_cmd(self, sources, dest): 280 """ 281 Given a string of source paths and a destination path, produces the 282 appropriate scp command for encoding it. Remote paths must be 283 pre-encoded. 284 """ 285 command = ("scp -rq %s -o StrictHostKeyChecking=no " 286 "-o UserKnownHostsFile=%s -P %d %s '%s'") 287 return command % (self._master_ssh.ssh_option, self.known_hosts_file, 288 self.port, sources, dest) 289 290 291 def _make_rsync_compatible_globs(self, path, is_local): 292 """ 293 Given an rsync-style path, returns a list of globbed paths 294 that will hopefully provide equivalent behaviour for scp. Does not 295 support the full range of rsync pattern matching behaviour, only that 296 exposed in the get/send_file interface (trailing slashes). 297 298 The is_local param is flag indicating if the paths should be 299 interpreted as local or remote paths. 300 """ 301 302 # non-trailing slash paths should just work 303 if len(path) == 0 or path[-1] != "/": 304 return [path] 305 306 # make a function to test if a pattern matches any files 307 if is_local: 308 def glob_matches_files(path, pattern): 309 return len(glob.glob(path + pattern)) > 0 310 else: 311 def glob_matches_files(path, pattern): 312 result = self.run("ls \"%s\"%s" % (utils.sh_escape(path), 313 pattern), 314 stdout_tee=None, ignore_status=True) 315 return result.exit_status == 0 316 317 # take a set of globs that cover all files, and see which are needed 318 patterns = ["*", ".[!.]*"] 319 patterns = [p for p in patterns if glob_matches_files(path, p)] 320 321 # convert them into a set of paths suitable for the commandline 322 if is_local: 323 return ["\"%s\"%s" % (utils.sh_escape(path), pattern) 324 for pattern in patterns] 325 else: 326 return [utils.scp_remote_escape(path) + pattern 327 for pattern in patterns] 328 329 330 def _make_rsync_compatible_source(self, source, is_local): 331 """ 332 Applies the same logic as _make_rsync_compatible_globs, but 333 applies it to an entire list of sources, producing a new list of 334 sources, properly quoted. 335 """ 336 return sum((self._make_rsync_compatible_globs(path, is_local) 337 for path in source), []) 338 339 340 def _set_umask_perms(self, dest): 341 """ 342 Given a destination file/dir (recursively) set the permissions on 343 all the files and directories to the max allowed by running umask. 344 """ 345 346 # now this looks strange but I haven't found a way in Python to _just_ 347 # get the umask, apparently the only option is to try to set it 348 umask = os.umask(0) 349 os.umask(umask) 350 351 max_privs = 0777 & ~umask 352 353 def set_file_privs(filename): 354 """Sets mode of |filename|. Assumes |filename| exists.""" 355 file_stat = os.stat(filename) 356 357 file_privs = max_privs 358 # if the original file permissions do not have at least one 359 # executable bit then do not set it anywhere 360 if not file_stat.st_mode & 0111: 361 file_privs &= ~0111 362 363 os.chmod(filename, file_privs) 364 365 # try a bottom-up walk so changes on directory permissions won't cut 366 # our access to the files/directories inside it 367 for root, dirs, files in os.walk(dest, topdown=False): 368 # when setting the privileges we emulate the chmod "X" behaviour 369 # that sets to execute only if it is a directory or any of the 370 # owner/group/other already has execute right 371 for dirname in dirs: 372 os.chmod(os.path.join(root, dirname), max_privs) 373 374 # Filter out broken symlinks as we go. 375 for filename in filter(os.path.exists, files): 376 set_file_privs(os.path.join(root, filename)) 377 378 379 # now set privs for the dest itself 380 if os.path.isdir(dest): 381 os.chmod(dest, max_privs) 382 else: 383 set_file_privs(dest) 384 385 386 def get_file(self, source, dest, delete_dest=False, preserve_perm=True, 387 preserve_symlinks=False, retry=True, safe_symlinks=False): 388 """ 389 Copy files from the remote host to a local path. 390 391 Directories will be copied recursively. 392 If a source component is a directory with a trailing slash, 393 the content of the directory will be copied, otherwise, the 394 directory itself and its content will be copied. This 395 behavior is similar to that of the program 'rsync'. 396 397 Args: 398 source: either 399 1) a single file or directory, as a string 400 2) a list of one or more (possibly mixed) 401 files or directories 402 dest: a file or a directory (if source contains a 403 directory or more than one element, you must 404 supply a directory dest) 405 delete_dest: if this is true, the command will also clear 406 out any old files at dest that are not in the 407 source 408 preserve_perm: tells get_file() to try to preserve the sources 409 permissions on files and dirs 410 preserve_symlinks: try to preserve symlinks instead of 411 transforming them into files/dirs on copy 412 safe_symlinks: same as preserve_symlinks, but discard links 413 that may point outside the copied tree 414 Raises: 415 AutoservRunError: the scp command failed 416 """ 417 logging.debug('get_file. source: %s, dest: %s, delete_dest: %s,' 418 'preserve_perm: %s, preserve_symlinks:%s', source, dest, 419 delete_dest, preserve_perm, preserve_symlinks) 420 421 # Start a master SSH connection if necessary. 422 self.start_master_ssh() 423 424 if isinstance(source, basestring): 425 source = [source] 426 dest = os.path.abspath(dest) 427 428 # If rsync is disabled or fails, try scp. 429 try_scp = True 430 if self.use_rsync(): 431 logging.debug('Using Rsync.') 432 try: 433 remote_source = self._encode_remote_paths(source) 434 local_dest = utils.sh_escape(dest) 435 rsync = self._make_rsync_cmd(remote_source, local_dest, 436 delete_dest, preserve_symlinks, 437 safe_symlinks) 438 utils.run(rsync) 439 try_scp = False 440 except error.CmdError, e: 441 # retry on rsync exit values which may be caused by transient 442 # network problems: 443 # 444 # rc 10: Error in socket I/O 445 # rc 12: Error in rsync protocol data stream 446 # rc 23: Partial transfer due to error 447 # rc 255: Ssh error 448 # 449 # Note that rc 23 includes dangling symlinks. In this case 450 # retrying is useless, but not very damaging since rsync checks 451 # for those before starting the transfer (scp does not). 452 status = e.result_obj.exit_status 453 if status in [10, 12, 23, 255] and retry: 454 logging.warning('rsync status %d, retrying', status) 455 self.get_file(source, dest, delete_dest, preserve_perm, 456 preserve_symlinks, retry=False) 457 # The nested get_file() does all that's needed. 458 return 459 else: 460 logging.warning("trying scp, rsync failed: %s (%d)", 461 e, status) 462 463 if try_scp: 464 logging.debug('Trying scp.') 465 # scp has no equivalent to --delete, just drop the entire dest dir 466 if delete_dest and os.path.isdir(dest): 467 shutil.rmtree(dest) 468 os.mkdir(dest) 469 470 remote_source = self._make_rsync_compatible_source(source, False) 471 if remote_source: 472 # _make_rsync_compatible_source() already did the escaping 473 remote_source = self._encode_remote_paths( 474 remote_source, escape=False, use_scp=True) 475 local_dest = utils.sh_escape(dest) 476 scp = self._make_scp_cmd(remote_source, local_dest) 477 try: 478 utils.run(scp) 479 except error.CmdError, e: 480 logging.debug('scp failed: %s', e) 481 raise error.AutoservRunError(e.args[0], e.args[1]) 482 483 if not preserve_perm: 484 # we have no way to tell scp to not try to preserve the 485 # permissions so set them after copy instead. 486 # for rsync we could use "--no-p --chmod=ugo=rwX" but those 487 # options are only in very recent rsync versions 488 self._set_umask_perms(dest) 489 490 491 def send_file(self, source, dest, delete_dest=False, 492 preserve_symlinks=False, excludes=None): 493 """ 494 Copy files from a local path to the remote host. 495 496 Directories will be copied recursively. 497 If a source component is a directory with a trailing slash, 498 the content of the directory will be copied, otherwise, the 499 directory itself and its content will be copied. This 500 behavior is similar to that of the program 'rsync'. 501 502 Args: 503 source: either 504 1) a single file or directory, as a string 505 2) a list of one or more (possibly mixed) 506 files or directories 507 dest: a file or a directory (if source contains a 508 directory or more than one element, you must 509 supply a directory dest) 510 delete_dest: if this is true, the command will also clear 511 out any old files at dest that are not in the 512 source 513 preserve_symlinks: controls if symlinks on the source will be 514 copied as such on the destination or transformed into the 515 referenced file/directory 516 excludes: A list of file pattern that matches files not to be 517 sent. `send_file` will fail if exclude is set, since 518 local copy does not support --exclude, e.g., when 519 using scp to copy file. 520 521 Raises: 522 AutoservRunError: the scp command failed 523 """ 524 logging.debug('send_file. source: %s, dest: %s, delete_dest: %s,' 525 'preserve_symlinks:%s', source, dest, 526 delete_dest, preserve_symlinks) 527 # Start a master SSH connection if necessary. 528 self.start_master_ssh() 529 530 if isinstance(source, basestring): 531 source = [source] 532 533 local_sources = self._encode_local_paths(source) 534 if not local_sources: 535 raise error.TestError('source |%s| yielded an empty string' % ( 536 source)) 537 if local_sources.find('\x00') != -1: 538 raise error.TestError('one or more sources include NUL char') 539 540 # If rsync is disabled or fails, try scp. 541 try_scp = True 542 if self.use_rsync(): 543 logging.debug('Using Rsync.') 544 remote_dest = self._encode_remote_paths([dest]) 545 try: 546 rsync = self._make_rsync_cmd(local_sources, remote_dest, 547 delete_dest, preserve_symlinks, 548 False, excludes=excludes) 549 utils.run(rsync) 550 try_scp = False 551 except error.CmdError, e: 552 logging.warning("trying scp, rsync failed: %s", e) 553 554 if try_scp: 555 logging.debug('Trying scp.') 556 if excludes: 557 raise error.AutotestHostRunError( 558 '--exclude is not supported in scp, try to use rsync. ' 559 'excludes: %s' % ','.join(excludes), None) 560 # scp has no equivalent to --delete, just drop the entire dest dir 561 if delete_dest: 562 is_dir = self.run("ls -d %s/" % dest, 563 ignore_status=True).exit_status == 0 564 if is_dir: 565 cmd = "rm -rf %s && mkdir %s" 566 cmd %= (dest, dest) 567 self.run(cmd) 568 569 remote_dest = self._encode_remote_paths([dest], use_scp=True) 570 local_sources = self._make_rsync_compatible_source(source, True) 571 if local_sources: 572 sources = self._encode_local_paths(local_sources, escape=False) 573 scp = self._make_scp_cmd(sources, remote_dest) 574 try: 575 utils.run(scp) 576 except error.CmdError, e: 577 logging.debug('scp failed: %s', e) 578 raise error.AutoservRunError(e.args[0], e.args[1]) 579 else: 580 logging.debug('skipping scp for empty source list') 581 582 583 def verify_ssh_user_access(self): 584 """Verify ssh access to this host. 585 586 @returns False if ssh_ping fails due to Permissions error, True 587 otherwise. 588 """ 589 try: 590 self.ssh_ping() 591 except (error.AutoservSshPermissionDeniedError, 592 error.AutoservSshPingHostError): 593 return False 594 return True 595 596 597 def ssh_ping(self, timeout=60, connect_timeout=None, base_cmd='true'): 598 """ 599 Pings remote host via ssh. 600 601 @param timeout: Command execution timeout in seconds. 602 Defaults to 60 seconds. 603 @param connect_timeout: ssh connection timeout in seconds. 604 @param base_cmd: The base command to run with the ssh ping. 605 Defaults to true. 606 @raise AutoservSSHTimeout: If the ssh ping times out. 607 @raise AutoservSshPermissionDeniedError: If ssh ping fails due to 608 permissions. 609 @raise AutoservSshPingHostError: For other AutoservRunErrors. 610 """ 611 ctimeout = min(timeout, connect_timeout or timeout) 612 try: 613 self.run(base_cmd, timeout=timeout, connect_timeout=ctimeout, 614 ssh_failure_retry_ok=True) 615 except error.AutoservSSHTimeout: 616 msg = "Host (ssh) verify timed out (timeout = %d)" % timeout 617 raise error.AutoservSSHTimeout(msg) 618 except error.AutoservSshPermissionDeniedError: 619 #let AutoservSshPermissionDeniedError be visible to the callers 620 raise 621 except error.AutoservRunError, e: 622 # convert the generic AutoservRunError into something more 623 # specific for this context 624 raise error.AutoservSshPingHostError(e.description + '\n' + 625 repr(e.result_obj)) 626 627 628 def is_up(self, timeout=60, connect_timeout=None, base_cmd='true'): 629 """ 630 Check if the remote host is up by ssh-ing and running a base command. 631 632 @param timeout: command execution timeout in seconds. 633 @param connect_timeout: ssh connection timeout in seconds. 634 @param base_cmd: a base command to run with ssh. The default is 'true'. 635 @returns True if the remote host is up before the timeout expires, 636 False otherwise. 637 """ 638 try: 639 self.ssh_ping(timeout=timeout, 640 connect_timeout=connect_timeout, 641 base_cmd=base_cmd) 642 except error.AutoservError: 643 return False 644 else: 645 return True 646 647 648 def is_up_fast(self): 649 """Return True if the host can be pinged.""" 650 ping_config = ping_runner.PingConfig( 651 self.hostname, count=3, ignore_result=True, ignore_status=True) 652 return ping_runner.PingRunner().ping(ping_config).received > 0 653 654 655 def wait_up(self, timeout=_DEFAULT_WAIT_UP_TIME_SECONDS): 656 """ 657 Wait until the remote host is up or the timeout expires. 658 659 In fact, it will wait until an ssh connection to the remote 660 host can be established, and getty is running. 661 662 @param timeout time limit in seconds before returning even 663 if the host is not up. 664 665 @returns True if the host was found to be up before the timeout expires, 666 False otherwise 667 """ 668 current_time = int(time.time()) 669 end_time = current_time + timeout 670 671 autoserv_error_logged = False 672 while current_time < end_time: 673 ping_timeout = min(_DEFAULT_MAX_PING_TIMEOUT, 674 end_time - current_time) 675 if self.is_up(timeout=ping_timeout, connect_timeout=ping_timeout): 676 try: 677 if self.are_wait_up_processes_up(): 678 logging.debug('Host %s is now up', self.host_port) 679 return True 680 except error.AutoservError as e: 681 if not autoserv_error_logged: 682 logging.debug('Ignoring failure to reach %s: %s %s', 683 self.host_port, e, 684 '(and further similar failures)') 685 autoserv_error_logged = True 686 time.sleep(1) 687 current_time = int(time.time()) 688 689 logging.debug('Host %s is still down after waiting %d seconds', 690 self.host_port, int(timeout + time.time() - end_time)) 691 return False 692 693 694 def wait_down(self, timeout=_DEFAULT_WAIT_DOWN_TIME_SECONDS, 695 warning_timer=None, old_boot_id=None, 696 max_ping_timeout=_DEFAULT_MAX_PING_TIMEOUT): 697 """ 698 Wait until the remote host is down or the timeout expires. 699 700 If old_boot_id is provided, waits until either the machine is 701 unpingable or self.get_boot_id() returns a value different from 702 old_boot_id. If the boot_id value has changed then the function 703 returns True under the assumption that the machine has shut down 704 and has now already come back up. 705 706 If old_boot_id is None then until the machine becomes unreachable the 707 method assumes the machine has not yet shut down. 708 709 @param timeout Time limit in seconds before returning even if the host 710 is still up. 711 @param warning_timer Time limit in seconds that will generate a warning 712 if the host is not down yet. Can be None for no warning. 713 @param old_boot_id A string containing the result of self.get_boot_id() 714 prior to the host being told to shut down. Can be None if this is 715 not available. 716 @param max_ping_timeout Maximum timeout in seconds for each 717 self.get_boot_id() call. If this timeout is hit, it is assumed that 718 the host went down and became unreachable. 719 720 @returns True if the host was found to be down (max_ping_timeout timeout 721 expired or boot_id changed if provided) and False if timeout 722 expired. 723 """ 724 #TODO: there is currently no way to distinguish between knowing 725 #TODO: boot_id was unsupported and not knowing the boot_id. 726 current_time = int(time.time()) 727 end_time = current_time + timeout 728 729 if warning_timer: 730 warn_time = current_time + warning_timer 731 732 if old_boot_id is not None: 733 logging.debug('Host %s pre-shutdown boot_id is %s', 734 self.host_port, old_boot_id) 735 736 # Impose semi real-time deadline constraints, since some clients 737 # (eg: watchdog timer tests) expect strict checking of time elapsed. 738 # Each iteration of this loop is treated as though it atomically 739 # completes within current_time, this is needed because if we used 740 # inline time.time() calls instead then the following could happen: 741 # 742 # while time.time() < end_time: [23 < 30] 743 # some code. [takes 10 secs] 744 # try: 745 # new_boot_id = self.get_boot_id(timeout=end_time - time.time()) 746 # [30 - 33] 747 # The last step will lead to a return True, when in fact the machine 748 # went down at 32 seconds (>30). Hence we need to pass get_boot_id 749 # the same time that allowed us into that iteration of the loop. 750 while current_time < end_time: 751 ping_timeout = min(end_time - current_time, max_ping_timeout) 752 try: 753 new_boot_id = self.get_boot_id(timeout=ping_timeout) 754 except error.AutoservError: 755 logging.debug('Host %s is now unreachable over ssh, is down', 756 self.host_port) 757 return True 758 else: 759 # if the machine is up but the boot_id value has changed from 760 # old boot id, then we can assume the machine has gone down 761 # and then already come back up 762 if old_boot_id is not None and old_boot_id != new_boot_id: 763 logging.debug('Host %s now has boot_id %s and so must ' 764 'have rebooted', self.host_port, new_boot_id) 765 return True 766 767 if warning_timer and current_time > warn_time: 768 self.record("INFO", None, "shutdown", 769 "Shutdown took longer than %ds" % warning_timer) 770 # Print the warning only once. 771 warning_timer = None 772 # If a machine is stuck switching runlevels 773 # This may cause the machine to reboot. 774 self.run('kill -HUP 1', ignore_status=True) 775 776 time.sleep(1) 777 current_time = int(time.time()) 778 779 return False 780 781 782 # tunable constants for the verify & repair code 783 AUTOTEST_GB_DISKSPACE_REQUIRED = get_value("SERVER", 784 "gb_diskspace_required", 785 type=float, 786 default=20.0) 787 788 789 def verify_connectivity(self): 790 super(AbstractSSHHost, self).verify_connectivity() 791 792 logging.info('Pinging host ' + self.host_port) 793 self.ssh_ping() 794 logging.info("Host (ssh) %s is alive", self.host_port) 795 796 if self.is_shutting_down(): 797 raise error.AutoservHostIsShuttingDownError("Host is shutting down") 798 799 800 def verify_software(self): 801 super(AbstractSSHHost, self).verify_software() 802 try: 803 self.check_diskspace(autotest.Autotest.get_install_dir(self), 804 self.AUTOTEST_GB_DISKSPACE_REQUIRED) 805 except error.AutoservDiskFullHostError: 806 # only want to raise if it's a space issue 807 raise 808 except (error.AutoservHostError, autotest.AutodirNotFoundError): 809 logging.exception('autodir space check exception, this is probably ' 810 'safe to ignore\n') 811 812 813 def close(self): 814 super(AbstractSSHHost, self).close() 815 self.rpc_server_tracker.disconnect_all() 816 if not self._connection_pool: 817 self._master_ssh.close() 818 if os.path.exists(self.known_hosts_file): 819 os.remove(self.known_hosts_file) 820 821 822 def restart_master_ssh(self): 823 """ 824 Stop and restart the ssh master connection. This is meant as a last 825 resort when ssh commands fail and we don't understand why. 826 """ 827 logging.debug('Restarting master ssh connection') 828 self._master_ssh.close() 829 self._master_ssh.maybe_start(timeout=30) 830 831 832 833 def start_master_ssh(self, timeout=DEFAULT_START_MASTER_SSH_TIMEOUT_S): 834 """ 835 Called whenever a slave SSH connection needs to be initiated (e.g., by 836 run, rsync, scp). If master SSH support is enabled and a master SSH 837 connection is not active already, start a new one in the background. 838 Also, cleanup any zombie master SSH connections (e.g., dead due to 839 reboot). 840 841 timeout: timeout in seconds (default 5) to wait for master ssh 842 connection to be established. If timeout is reached, a 843 warning message is logged, but no other action is taken. 844 """ 845 if not enable_master_ssh: 846 return 847 self._master_ssh.maybe_start(timeout=timeout) 848 849 850 def clear_known_hosts(self): 851 """Clears out the temporary ssh known_hosts file. 852 853 This is useful if the test SSHes to the machine, then reinstalls it, 854 then SSHes to it again. It can be called after the reinstall to 855 reduce the spam in the logs. 856 """ 857 logging.info("Clearing known hosts for host '%s', file '%s'.", 858 self.host_port, self.known_hosts_file) 859 # Clear out the file by opening it for writing and then closing. 860 fh = open(self.known_hosts_file, "w") 861 fh.close() 862 863 864 def collect_logs(self, remote_src_dir, local_dest_dir, ignore_errors=True): 865 """Copy log directories from a host to a local directory. 866 867 @param remote_src_dir: A destination directory on the host. 868 @param local_dest_dir: A path to a local destination directory. 869 If it doesn't exist it will be created. 870 @param ignore_errors: If True, ignore exceptions. 871 872 @raises OSError: If there were problems creating the local_dest_dir and 873 ignore_errors is False. 874 @raises AutoservRunError, AutotestRunError: If something goes wrong 875 while copying the directories and ignore_errors is False. 876 """ 877 if not self.check_cached_up_status(): 878 logging.warning('Host %s did not answer to ping, skip collecting ' 879 'logs.', self.host_port) 880 return 881 882 locally_created_dest = False 883 if (not os.path.exists(local_dest_dir) 884 or not os.path.isdir(local_dest_dir)): 885 try: 886 os.makedirs(local_dest_dir) 887 locally_created_dest = True 888 except OSError as e: 889 logging.warning('Unable to collect logs from host ' 890 '%s: %s', self.host_port, e) 891 if not ignore_errors: 892 raise 893 return 894 895 # Build test result directory summary 896 try: 897 result_tools_runner.run_on_client(self, remote_src_dir) 898 except (error.AutotestRunError, error.AutoservRunError, 899 error.AutoservSSHTimeout) as e: 900 logging.exception( 901 'Non-critical failure: Failed to collect and throttle ' 902 'results at %s from host %s', remote_src_dir, 903 self.host_port) 904 905 try: 906 self.get_file(remote_src_dir, local_dest_dir, safe_symlinks=True) 907 except (error.AutotestRunError, error.AutoservRunError, 908 error.AutoservSSHTimeout) as e: 909 logging.warning('Collection of %s to local dir %s from host %s ' 910 'failed: %s', remote_src_dir, local_dest_dir, 911 self.host_port, e) 912 if locally_created_dest: 913 shutil.rmtree(local_dest_dir, ignore_errors=ignore_errors) 914 if not ignore_errors: 915 raise 916 917 # Clean up directory summary file on the client side. 918 try: 919 result_tools_runner.run_on_client(self, remote_src_dir, 920 cleanup_only=True) 921 except (error.AutotestRunError, error.AutoservRunError, 922 error.AutoservSSHTimeout) as e: 923 logging.exception( 924 'Non-critical failure: Failed to cleanup result summary ' 925 'files at %s in host %s', remote_src_dir, self.hostname) 926 927 928 def create_ssh_tunnel(self, port, local_port): 929 """Create an ssh tunnel from local_port to port. 930 931 This is used to forward a port securely through a tunnel process from 932 the server to the DUT for RPC server connection. 933 934 @param port: remote port on the host. 935 @param local_port: local forwarding port. 936 937 @return: the tunnel process. 938 """ 939 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port) 940 ssh_cmd = self.make_ssh_command(opts=tunnel_options, port=self.port) 941 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname) 942 logging.debug('Full tunnel command: %s', tunnel_cmd) 943 # Exec the ssh process directly here rather than using a shell. 944 # Using a shell leaves a dangling ssh process, because we deliver 945 # signals to the shell wrapping ssh, not the ssh process itself. 946 args = shlex.split(tunnel_cmd) 947 tunnel_proc = subprocess.Popen(args, close_fds=True) 948 logging.debug('Started ssh tunnel, local = %d' 949 ' remote = %d, pid = %d', 950 local_port, port, tunnel_proc.pid) 951 return tunnel_proc 952 953 954 def disconnect_ssh_tunnel(self, tunnel_proc, port): 955 """ 956 Disconnects a previously forwarded port from the server to the DUT for 957 RPC server connection. 958 959 @param tunnel_proc: a tunnel process returned from |create_ssh_tunnel|. 960 @param port: remote port on the DUT, used in ADBHost. 961 962 """ 963 if tunnel_proc.poll() is None: 964 tunnel_proc.terminate() 965 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid) 966 else: 967 logging.debug('Tunnel pid %d terminated early, status %d', 968 tunnel_proc.pid, tunnel_proc.returncode) 969 970 971 def get_os_type(self): 972 """Returns the host OS descriptor (to be implemented in subclasses). 973 974 @return A string describing the OS type. 975 """ 976 raise NotImplementedError 977 978 979 def check_cached_up_status( 980 self, expiration_seconds=_DEFAULT_UP_STATUS_EXPIRATION_SECONDS): 981 """Check if the DUT responded to ping in the past `expiration_seconds`. 982 983 @param expiration_seconds: The number of seconds to keep the cached 984 status of whether the DUT responded to ping. 985 @return: True if the DUT has responded to ping during the past 986 `expiration_seconds`. 987 """ 988 # Refresh the up status if any of following conditions is true: 989 # * cached status is never set 990 # * cached status is False, so the method can check if the host is up 991 # again. 992 # * If the cached status is older than `expiration_seconds` 993 expire_time = time.time() - expiration_seconds 994 if (self._cached_up_status_updated is None or 995 not self._cached_up_status or 996 self._cached_up_status_updated < expire_time): 997 self._cached_up_status = self.is_up_fast() 998 self._cached_up_status_updated = time.time() 999 return self._cached_up_status 1000