1# Lint as: python2, python3 2# Copyright (c) 2008 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6from __future__ import absolute_import 7from __future__ import division 8from __future__ import print_function 9 10import os, time, socket, shutil, glob, logging, tempfile, re 11import shlex 12import subprocess 13 14from autotest_lib.client.bin.result_tools import runner as result_tools_runner 15from autotest_lib.client.common_lib import error 16from autotest_lib.client.common_lib import utils 17from autotest_lib.client.common_lib.cros.network import ping_runner 18from autotest_lib.client.common_lib.global_config import global_config 19from autotest_lib.server import utils, autotest 20from autotest_lib.server.hosts import host_info 21from autotest_lib.server.hosts import remote 22from autotest_lib.server.hosts import rpc_server_tracker 23from autotest_lib.server.hosts import ssh_multiplex 24import six 25from six.moves import filter 26 27try: 28 from chromite.lib import metrics 29except ImportError: 30 metrics = utils.metrics_mock 31 32# pylint: disable=C0111 33 34get_value = global_config.get_config_value 35enable_master_ssh = get_value('AUTOSERV', 'enable_master_ssh', type=bool, 36 default=False) 37 38# Number of seconds to use the cached up status. 39_DEFAULT_UP_STATUS_EXPIRATION_SECONDS = 300 40_DEFAULT_SSH_PORT = 22 41 42# Number of seconds to wait for the host to shut down in wait_down(). 43_DEFAULT_WAIT_DOWN_TIME_SECONDS = 120 44 45# Number of seconds to wait for the host to boot up in wait_up(). 46_DEFAULT_WAIT_UP_TIME_SECONDS = 120 47 48# Timeout in seconds for a single call of get_boot_id() in wait_down() 49# and a single ssh ping in wait_up(). 50_DEFAULT_MAX_PING_TIMEOUT = 10 51 52class AbstractSSHHost(remote.RemoteHost): 53 """ 54 This class represents a generic implementation of most of the 55 framework necessary for controlling a host via ssh. It implements 56 almost all of the abstract Host methods, except for the core 57 Host.run method. 58 """ 59 VERSION_PREFIX = '' 60 # Timeout for master ssh connection setup, in seconds. 61 DEFAULT_START_MASTER_SSH_TIMEOUT_S = 5 62 63 def _initialize(self, hostname, user="root", port=_DEFAULT_SSH_PORT, 64 password="", is_client_install_supported=True, 65 afe_host=None, host_info_store=None, connection_pool=None, 66 *args, **dargs): 67 super(AbstractSSHHost, self)._initialize(hostname=hostname, 68 *args, **dargs) 69 """ 70 @param hostname: The hostname of the host. 71 @param user: The username to use when ssh'ing into the host. 72 @param password: The password to use when ssh'ing into the host. 73 @param port: The port to use for ssh. 74 @param is_client_install_supported: Boolean to indicate if we can 75 install autotest on the host. 76 @param afe_host: The host object attained from the AFE (get_hosts). 77 @param host_info_store: Optional host_info.CachingHostInfoStore object 78 to obtain / update host information. 79 @param connection_pool: ssh_multiplex.ConnectionPool instance to share 80 the master ssh connection across control scripts. 81 """ 82 self._track_class_usage() 83 # IP address is retrieved only on demand. Otherwise the host 84 # initialization will fail for host is not online. 85 self._ip = None 86 self.user = user 87 self.port = port 88 self.password = password 89 self._is_client_install_supported = is_client_install_supported 90 self._use_rsync = None 91 self.known_hosts_file = tempfile.mkstemp()[1] 92 self._rpc_server_tracker = rpc_server_tracker.RpcServerTracker(self); 93 94 """ 95 Master SSH connection background job, socket temp directory and socket 96 control path option. If master-SSH is enabled, these fields will be 97 initialized by start_master_ssh when a new SSH connection is initiated. 98 """ 99 self._connection_pool = connection_pool 100 if connection_pool: 101 self._master_ssh = connection_pool.get(hostname, user, port) 102 else: 103 self._master_ssh = ssh_multiplex.MasterSsh(hostname, user, port) 104 105 self._afe_host = afe_host or utils.EmptyAFEHost() 106 self.host_info_store = (host_info_store or 107 host_info.InMemoryHostInfoStore()) 108 109 # The cached status of whether the DUT responded to ping. 110 self._cached_up_status = None 111 # The timestamp when the value of _cached_up_status is set. 112 self._cached_up_status_updated = None 113 114 115 @property 116 def ip(self): 117 """@return IP address of the host. 118 """ 119 if not self._ip: 120 self._ip = socket.getaddrinfo(self.hostname, None)[0][4][0] 121 return self._ip 122 123 124 @property 125 def is_client_install_supported(self): 126 """" 127 Returns True if the host supports autotest client installs, False 128 otherwise. 129 """ 130 return self._is_client_install_supported 131 132 133 @property 134 def rpc_server_tracker(self): 135 """" 136 @return The RPC server tracker associated with this host. 137 """ 138 return self._rpc_server_tracker 139 140 141 @property 142 def is_default_port(self): 143 """Returns True if its port is default SSH port.""" 144 return self.port == _DEFAULT_SSH_PORT 145 146 @property 147 def host_port(self): 148 """Returns hostname if port is default. Otherwise, hostname:port. 149 """ 150 if self.is_default_port: 151 return self.hostname 152 else: 153 return '%s:%d' % (self.hostname, self.port) 154 155 156 # Though it doesn't use self here, it is not declared as staticmethod 157 # because its subclass may use self to access member variables. 158 def make_ssh_command(self, user="root", port=_DEFAULT_SSH_PORT, opts='', 159 hosts_file='/dev/null', connect_timeout=30, 160 alive_interval=300, alive_count_max=3, 161 connection_attempts=1): 162 ssh_options = " ".join([ 163 opts, 164 self.make_ssh_options( 165 hosts_file=hosts_file, connect_timeout=connect_timeout, 166 alive_interval=alive_interval, alive_count_max=alive_count_max, 167 connection_attempts=connection_attempts)]) 168 return "/usr/bin/ssh -a -x %s -l %s -p %d" % (ssh_options, user, port) 169 170 171 @staticmethod 172 def make_ssh_options(hosts_file='/dev/null', connect_timeout=30, 173 alive_interval=300, alive_count_max=3, 174 connection_attempts=1): 175 """Composes SSH -o options.""" 176 assert isinstance(connect_timeout, six.integer_types) 177 assert connect_timeout > 0 # can't disable the timeout 178 179 options = [("StrictHostKeyChecking", "no"), 180 ("UserKnownHostsFile", hosts_file), 181 ("BatchMode", "yes"), 182 ("ConnectTimeout", str(connect_timeout)), 183 ("ServerAliveInterval", str(alive_interval)), 184 ("ServerAliveCountMax", str(alive_count_max)), 185 ("ConnectionAttempts", str(connection_attempts))] 186 return " ".join("-o %s=%s" % kv for kv in options) 187 188 189 def use_rsync(self): 190 if self._use_rsync is not None: 191 return self._use_rsync 192 193 # Check if rsync is available on the remote host. If it's not, 194 # don't try to use it for any future file transfers. 195 self._use_rsync = self.check_rsync() 196 if not self._use_rsync: 197 logging.warning("rsync not available on remote host %s -- disabled", 198 self.host_port) 199 return self._use_rsync 200 201 202 def check_rsync(self): 203 """ 204 Check if rsync is available on the remote host. 205 """ 206 try: 207 self.run("rsync --version", stdout_tee=None, stderr_tee=None) 208 except error.AutoservRunError: 209 return False 210 return True 211 212 213 def _encode_remote_paths(self, paths, escape=True, use_scp=False): 214 """ 215 Given a list of file paths, encodes it as a single remote path, in 216 the style used by rsync and scp. 217 escape: add \\ to protect special characters. 218 use_scp: encode for scp if true, rsync if false. 219 """ 220 if escape: 221 paths = [utils.scp_remote_escape(path) for path in paths] 222 223 remote = self.hostname 224 225 # rsync and scp require IPv6 brackets, even when there isn't any 226 # trailing port number (ssh doesn't support IPv6 brackets). 227 # In the Python >= 3.3 future, 'import ipaddress' will parse addresses. 228 if re.search(r':.*:', remote): 229 remote = '[%s]' % remote 230 231 if use_scp: 232 return '%s@%s:"%s"' % (self.user, remote, " ".join(paths)) 233 else: 234 return '%s@%s:%s' % ( 235 self.user, remote, 236 " :".join('"%s"' % p for p in paths)) 237 238 def _encode_local_paths(self, paths, escape=True): 239 """ 240 Given a list of file paths, encodes it as a single local path. 241 escape: add \\ to protect special characters. 242 """ 243 if escape: 244 paths = [utils.sh_escape(path) for path in paths] 245 246 return " ".join('"%s"' % p for p in paths) 247 248 249 def rsync_options(self, delete_dest=False, preserve_symlinks=False, 250 safe_symlinks=False, excludes=None): 251 """Obtains rsync options for the remote.""" 252 ssh_cmd = self.make_ssh_command(user=self.user, port=self.port, 253 opts=self._master_ssh.ssh_option, 254 hosts_file=self.known_hosts_file) 255 if delete_dest: 256 delete_flag = "--delete" 257 else: 258 delete_flag = "" 259 if safe_symlinks: 260 symlink_flag = "-l --safe-links" 261 elif preserve_symlinks: 262 symlink_flag = "-l" 263 else: 264 symlink_flag = "-L" 265 exclude_args = '' 266 if excludes: 267 exclude_args = ' '.join( 268 ["--exclude '%s'" % exclude for exclude in excludes]) 269 return "%s %s --timeout=1800 --rsh='%s' -az --no-o --no-g %s" % ( 270 symlink_flag, delete_flag, ssh_cmd, exclude_args) 271 272 273 def _make_rsync_cmd(self, sources, dest, delete_dest, 274 preserve_symlinks, safe_symlinks, excludes=None): 275 """ 276 Given a string of source paths and a destination path, produces the 277 appropriate rsync command for copying them. Remote paths must be 278 pre-encoded. 279 """ 280 rsync_options = self.rsync_options( 281 delete_dest=delete_dest, preserve_symlinks=preserve_symlinks, 282 safe_symlinks=safe_symlinks, excludes=excludes) 283 return 'rsync %s %s "%s"' % (rsync_options, sources, dest) 284 285 286 def _make_ssh_cmd(self, cmd): 287 """ 288 Create a base ssh command string for the host which can be used 289 to run commands directly on the machine 290 """ 291 base_cmd = self.make_ssh_command(user=self.user, port=self.port, 292 opts=self._master_ssh.ssh_option, 293 hosts_file=self.known_hosts_file) 294 295 return '%s %s "%s"' % (base_cmd, self.hostname, utils.sh_escape(cmd)) 296 297 def _make_scp_cmd(self, sources, dest): 298 """ 299 Given a string of source paths and a destination path, produces the 300 appropriate scp command for encoding it. Remote paths must be 301 pre-encoded. 302 """ 303 command = ("scp -rq %s -o StrictHostKeyChecking=no " 304 "-o UserKnownHostsFile=%s -P %d %s '%s'") 305 return command % (self._master_ssh.ssh_option, self.known_hosts_file, 306 self.port, sources, dest) 307 308 309 def _make_rsync_compatible_globs(self, path, is_local): 310 """ 311 Given an rsync-style path, returns a list of globbed paths 312 that will hopefully provide equivalent behaviour for scp. Does not 313 support the full range of rsync pattern matching behaviour, only that 314 exposed in the get/send_file interface (trailing slashes). 315 316 The is_local param is flag indicating if the paths should be 317 interpreted as local or remote paths. 318 """ 319 320 # non-trailing slash paths should just work 321 if len(path) == 0 or path[-1] != "/": 322 return [path] 323 324 # make a function to test if a pattern matches any files 325 if is_local: 326 def glob_matches_files(path, pattern): 327 return len(glob.glob(path + pattern)) > 0 328 else: 329 def glob_matches_files(path, pattern): 330 result = self.run("ls \"%s\"%s" % (utils.sh_escape(path), 331 pattern), 332 stdout_tee=None, ignore_status=True) 333 return result.exit_status == 0 334 335 # take a set of globs that cover all files, and see which are needed 336 patterns = ["*", ".[!.]*"] 337 patterns = [p for p in patterns if glob_matches_files(path, p)] 338 339 # convert them into a set of paths suitable for the commandline 340 if is_local: 341 return ["\"%s\"%s" % (utils.sh_escape(path), pattern) 342 for pattern in patterns] 343 else: 344 return [utils.scp_remote_escape(path) + pattern 345 for pattern in patterns] 346 347 348 def _make_rsync_compatible_source(self, source, is_local): 349 """ 350 Applies the same logic as _make_rsync_compatible_globs, but 351 applies it to an entire list of sources, producing a new list of 352 sources, properly quoted. 353 """ 354 return sum((self._make_rsync_compatible_globs(path, is_local) 355 for path in source), []) 356 357 358 def _set_umask_perms(self, dest): 359 """ 360 Given a destination file/dir (recursively) set the permissions on 361 all the files and directories to the max allowed by running umask. 362 """ 363 364 # now this looks strange but I haven't found a way in Python to _just_ 365 # get the umask, apparently the only option is to try to set it 366 umask = os.umask(0) 367 os.umask(umask) 368 369 max_privs = 0o777 & ~umask 370 371 def set_file_privs(filename): 372 """Sets mode of |filename|. Assumes |filename| exists.""" 373 file_stat = os.stat(filename) 374 375 file_privs = max_privs 376 # if the original file permissions do not have at least one 377 # executable bit then do not set it anywhere 378 if not file_stat.st_mode & 0o111: 379 file_privs &= ~0o111 380 381 os.chmod(filename, file_privs) 382 383 # try a bottom-up walk so changes on directory permissions won't cut 384 # our access to the files/directories inside it 385 for root, dirs, files in os.walk(dest, topdown=False): 386 # when setting the privileges we emulate the chmod "X" behaviour 387 # that sets to execute only if it is a directory or any of the 388 # owner/group/other already has execute right 389 for dirname in dirs: 390 os.chmod(os.path.join(root, dirname), max_privs) 391 392 # Filter out broken symlinks as we go. 393 for filename in filter(os.path.exists, files): 394 set_file_privs(os.path.join(root, filename)) 395 396 397 # now set privs for the dest itself 398 if os.path.isdir(dest): 399 os.chmod(dest, max_privs) 400 else: 401 set_file_privs(dest) 402 403 404 def get_file(self, source, dest, delete_dest=False, preserve_perm=True, 405 preserve_symlinks=False, retry=True, safe_symlinks=False, 406 try_rsync=True): 407 """ 408 Copy files from the remote host to a local path. 409 410 Directories will be copied recursively. 411 If a source component is a directory with a trailing slash, 412 the content of the directory will be copied, otherwise, the 413 directory itself and its content will be copied. This 414 behavior is similar to that of the program 'rsync'. 415 416 Args: 417 source: either 418 1) a single file or directory, as a string 419 2) a list of one or more (possibly mixed) 420 files or directories 421 dest: a file or a directory (if source contains a 422 directory or more than one element, you must 423 supply a directory dest) 424 delete_dest: if this is true, the command will also clear 425 out any old files at dest that are not in the 426 source 427 preserve_perm: tells get_file() to try to preserve the sources 428 permissions on files and dirs 429 preserve_symlinks: try to preserve symlinks instead of 430 transforming them into files/dirs on copy 431 safe_symlinks: same as preserve_symlinks, but discard links 432 that may point outside the copied tree 433 try_rsync: set to False to skip directly to using scp 434 Raises: 435 AutoservRunError: the scp command failed 436 """ 437 logging.debug('get_file. source: %s, dest: %s, delete_dest: %s,' 438 'preserve_perm: %s, preserve_symlinks:%s', source, dest, 439 delete_dest, preserve_perm, preserve_symlinks) 440 441 # Start a master SSH connection if necessary. 442 self.start_master_ssh() 443 444 if isinstance(source, six.string_types): 445 source = [source] 446 dest = os.path.abspath(dest) 447 448 # If rsync is disabled or fails, try scp. 449 try_scp = True 450 if try_rsync and self.use_rsync(): 451 logging.debug('Using Rsync.') 452 try: 453 remote_source = self._encode_remote_paths(source) 454 local_dest = utils.sh_escape(dest) 455 rsync = self._make_rsync_cmd(remote_source, local_dest, 456 delete_dest, preserve_symlinks, 457 safe_symlinks) 458 utils.run(rsync) 459 try_scp = False 460 except error.CmdError as e: 461 # retry on rsync exit values which may be caused by transient 462 # network problems: 463 # 464 # rc 10: Error in socket I/O 465 # rc 12: Error in rsync protocol data stream 466 # rc 23: Partial transfer due to error 467 # rc 255: Ssh error 468 # 469 # Note that rc 23 includes dangling symlinks. In this case 470 # retrying is useless, but not very damaging since rsync checks 471 # for those before starting the transfer (scp does not). 472 status = e.result_obj.exit_status 473 if status in [10, 12, 23, 255] and retry: 474 logging.warning('rsync status %d, retrying', status) 475 self.get_file(source, dest, delete_dest, preserve_perm, 476 preserve_symlinks, retry=False) 477 # The nested get_file() does all that's needed. 478 return 479 else: 480 logging.warning("trying scp, rsync failed: %s (%d)", 481 e, status) 482 483 if try_scp: 484 logging.debug('Trying scp.') 485 # scp has no equivalent to --delete, just drop the entire dest dir 486 if delete_dest and os.path.isdir(dest): 487 shutil.rmtree(dest) 488 os.mkdir(dest) 489 490 remote_source = self._make_rsync_compatible_source(source, False) 491 if remote_source: 492 # _make_rsync_compatible_source() already did the escaping 493 remote_source = self._encode_remote_paths( 494 remote_source, escape=False, use_scp=True) 495 local_dest = utils.sh_escape(dest) 496 scp = self._make_scp_cmd(remote_source, local_dest) 497 try: 498 utils.run(scp) 499 except error.CmdError as e: 500 logging.debug('scp failed: %s', e) 501 raise error.AutoservRunError(e.args[0], e.args[1]) 502 503 if not preserve_perm: 504 # we have no way to tell scp to not try to preserve the 505 # permissions so set them after copy instead. 506 # for rsync we could use "--no-p --chmod=ugo=rwX" but those 507 # options are only in very recent rsync versions 508 self._set_umask_perms(dest) 509 510 511 def send_file(self, source, dest, delete_dest=False, 512 preserve_symlinks=False, excludes=None): 513 """ 514 Copy files from a local path to the remote host. 515 516 Directories will be copied recursively. 517 If a source component is a directory with a trailing slash, 518 the content of the directory will be copied, otherwise, the 519 directory itself and its content will be copied. This 520 behavior is similar to that of the program 'rsync'. 521 522 Args: 523 source: either 524 1) a single file or directory, as a string 525 2) a list of one or more (possibly mixed) 526 files or directories 527 dest: a file or a directory (if source contains a 528 directory or more than one element, you must 529 supply a directory dest) 530 delete_dest: if this is true, the command will also clear 531 out any old files at dest that are not in the 532 source 533 preserve_symlinks: controls if symlinks on the source will be 534 copied as such on the destination or transformed into the 535 referenced file/directory 536 excludes: A list of file pattern that matches files not to be 537 sent. `send_file` will fail if exclude is set, since 538 local copy does not support --exclude, e.g., when 539 using scp to copy file. 540 541 Raises: 542 AutoservRunError: the scp command failed 543 """ 544 logging.debug('send_file. source: %s, dest: %s, delete_dest: %s,' 545 'preserve_symlinks:%s', source, dest, 546 delete_dest, preserve_symlinks) 547 # Start a master SSH connection if necessary. 548 self.start_master_ssh() 549 550 if isinstance(source, six.string_types): 551 source = [source] 552 553 local_sources = self._encode_local_paths(source) 554 if not local_sources: 555 raise error.TestError('source |%s| yielded an empty string' % ( 556 source)) 557 if local_sources.find('\x00') != -1: 558 raise error.TestError('one or more sources include NUL char') 559 560 # If rsync is disabled or fails, try scp. 561 try_scp = True 562 if self.use_rsync(): 563 logging.debug('Using Rsync.') 564 remote_dest = self._encode_remote_paths([dest]) 565 try: 566 rsync = self._make_rsync_cmd(local_sources, remote_dest, 567 delete_dest, preserve_symlinks, 568 False, excludes=excludes) 569 utils.run(rsync) 570 try_scp = False 571 except error.CmdError as e: 572 logging.warning("trying scp, rsync failed: %s", e) 573 574 if try_scp: 575 logging.debug('Trying scp.') 576 if excludes: 577 raise error.AutotestHostRunError( 578 '--exclude is not supported in scp, try to use rsync. ' 579 'excludes: %s' % ','.join(excludes), None) 580 # scp has no equivalent to --delete, just drop the entire dest dir 581 if delete_dest: 582 is_dir = self.run("ls -d %s/" % dest, 583 ignore_status=True).exit_status == 0 584 if is_dir: 585 cmd = "rm -rf %s && mkdir %s" 586 cmd %= (dest, dest) 587 self.run(cmd) 588 589 remote_dest = self._encode_remote_paths([dest], use_scp=True) 590 local_sources = self._make_rsync_compatible_source(source, True) 591 if local_sources: 592 sources = self._encode_local_paths(local_sources, escape=False) 593 scp = self._make_scp_cmd(sources, remote_dest) 594 try: 595 utils.run(scp) 596 except error.CmdError as e: 597 logging.debug('scp failed: %s', e) 598 raise error.AutoservRunError(e.args[0], e.args[1]) 599 else: 600 logging.debug('skipping scp for empty source list') 601 602 def verify_ssh_user_access(self): 603 """Verify ssh access to this host. 604 605 @returns False if ssh_ping fails due to Permissions error, True 606 otherwise. 607 """ 608 try: 609 self.ssh_ping() 610 except (error.AutoservSshPermissionDeniedError, 611 error.AutoservSshPingHostError): 612 return False 613 return True 614 615 616 def ssh_ping(self, timeout=60, connect_timeout=None, base_cmd='true'): 617 """ 618 Pings remote host via ssh. 619 620 @param timeout: Command execution timeout in seconds. 621 Defaults to 60 seconds. 622 @param connect_timeout: ssh connection timeout in seconds. 623 @param base_cmd: The base command to run with the ssh ping. 624 Defaults to true. 625 @raise AutoservSSHTimeout: If the ssh ping times out. 626 @raise AutoservSshPermissionDeniedError: If ssh ping fails due to 627 permissions. 628 @raise AutoservSshPingHostError: For other AutoservRunErrors. 629 """ 630 ctimeout = min(timeout, connect_timeout or timeout) 631 try: 632 self.run(base_cmd, timeout=timeout, connect_timeout=ctimeout, 633 ssh_failure_retry_ok=True) 634 except error.AutoservSSHTimeout: 635 msg = "Host (ssh) verify timed out (timeout = %d)" % timeout 636 raise error.AutoservSSHTimeout(msg) 637 except error.AutoservSshPermissionDeniedError: 638 #let AutoservSshPermissionDeniedError be visible to the callers 639 raise 640 except error.AutoservRunError as e: 641 # convert the generic AutoservRunError into something more 642 # specific for this context 643 raise error.AutoservSshPingHostError(e.description + '\n' + 644 repr(e.result_obj)) 645 646 647 def is_up(self, timeout=60, connect_timeout=None, base_cmd='true'): 648 """ 649 Check if the remote host is up by ssh-ing and running a base command. 650 651 @param timeout: command execution timeout in seconds. 652 @param connect_timeout: ssh connection timeout in seconds. 653 @param base_cmd: a base command to run with ssh. The default is 'true'. 654 @returns True if the remote host is up before the timeout expires, 655 False otherwise. 656 """ 657 try: 658 self.ssh_ping(timeout=timeout, 659 connect_timeout=connect_timeout, 660 base_cmd=base_cmd) 661 except error.AutoservError: 662 return False 663 else: 664 return True 665 666 667 def is_up_fast(self, count=1): 668 """Return True if the host can be pinged. 669 670 @param count How many time try to ping before decide that host is not 671 reachable by ping. 672 """ 673 ping_config = ping_runner.PingConfig(self.hostname, 674 count=count, 675 ignore_result=True, 676 ignore_status=True) 677 return ping_runner.PingRunner().ping(ping_config).received > 0 678 679 680 def wait_up(self, timeout=_DEFAULT_WAIT_UP_TIME_SECONDS): 681 """ 682 Wait until the remote host is up or the timeout expires. 683 684 In fact, it will wait until an ssh connection to the remote 685 host can be established, and getty is running. 686 687 @param timeout time limit in seconds before returning even 688 if the host is not up. 689 690 @returns True if the host was found to be up before the timeout expires, 691 False otherwise 692 """ 693 current_time = int(time.time()) 694 end_time = current_time + timeout 695 696 autoserv_error_logged = False 697 while current_time < end_time: 698 ping_timeout = min(_DEFAULT_MAX_PING_TIMEOUT, 699 end_time - current_time) 700 if self.is_up(timeout=ping_timeout, connect_timeout=ping_timeout): 701 try: 702 if self.are_wait_up_processes_up(): 703 logging.debug('Host %s is now up', self.host_port) 704 return True 705 except error.AutoservError as e: 706 if not autoserv_error_logged: 707 logging.debug('Ignoring failure to reach %s: %s %s', 708 self.host_port, e, 709 '(and further similar failures)') 710 autoserv_error_logged = True 711 time.sleep(1) 712 current_time = int(time.time()) 713 714 logging.debug('Host %s is still down after waiting %d seconds', 715 self.host_port, int(timeout + time.time() - end_time)) 716 return False 717 718 719 def wait_down(self, timeout=_DEFAULT_WAIT_DOWN_TIME_SECONDS, 720 warning_timer=None, old_boot_id=None, 721 max_ping_timeout=_DEFAULT_MAX_PING_TIMEOUT): 722 """ 723 Wait until the remote host is down or the timeout expires. 724 725 If old_boot_id is provided, waits until either the machine is 726 unpingable or self.get_boot_id() returns a value different from 727 old_boot_id. If the boot_id value has changed then the function 728 returns True under the assumption that the machine has shut down 729 and has now already come back up. 730 731 If old_boot_id is None then until the machine becomes unreachable the 732 method assumes the machine has not yet shut down. 733 734 @param timeout Time limit in seconds before returning even if the host 735 is still up. 736 @param warning_timer Time limit in seconds that will generate a warning 737 if the host is not down yet. Can be None for no warning. 738 @param old_boot_id A string containing the result of self.get_boot_id() 739 prior to the host being told to shut down. Can be None if this is 740 not available. 741 @param max_ping_timeout Maximum timeout in seconds for each 742 self.get_boot_id() call. If this timeout is hit, it is assumed that 743 the host went down and became unreachable. 744 745 @returns True if the host was found to be down (max_ping_timeout timeout 746 expired or boot_id changed if provided) and False if timeout 747 expired. 748 """ 749 #TODO: there is currently no way to distinguish between knowing 750 #TODO: boot_id was unsupported and not knowing the boot_id. 751 current_time = int(time.time()) 752 end_time = current_time + timeout 753 754 if warning_timer: 755 warn_time = current_time + warning_timer 756 757 if old_boot_id is not None: 758 logging.debug('Host %s pre-shutdown boot_id is %s', 759 self.host_port, old_boot_id) 760 761 # Impose semi real-time deadline constraints, since some clients 762 # (eg: watchdog timer tests) expect strict checking of time elapsed. 763 # Each iteration of this loop is treated as though it atomically 764 # completes within current_time, this is needed because if we used 765 # inline time.time() calls instead then the following could happen: 766 # 767 # while time.time() < end_time: [23 < 30] 768 # some code. [takes 10 secs] 769 # try: 770 # new_boot_id = self.get_boot_id(timeout=end_time - time.time()) 771 # [30 - 33] 772 # The last step will lead to a return True, when in fact the machine 773 # went down at 32 seconds (>30). Hence we need to pass get_boot_id 774 # the same time that allowed us into that iteration of the loop. 775 while current_time < end_time: 776 ping_timeout = min(end_time - current_time, max_ping_timeout) 777 try: 778 new_boot_id = self.get_boot_id(timeout=ping_timeout) 779 except error.AutoservError: 780 logging.debug('Host %s is now unreachable over ssh, is down', 781 self.host_port) 782 return True 783 else: 784 # if the machine is up but the boot_id value has changed from 785 # old boot id, then we can assume the machine has gone down 786 # and then already come back up 787 if old_boot_id is not None and old_boot_id != new_boot_id: 788 logging.debug('Host %s now has boot_id %s and so must ' 789 'have rebooted', self.host_port, new_boot_id) 790 return True 791 792 if warning_timer and current_time > warn_time: 793 self.record("INFO", None, "shutdown", 794 "Shutdown took longer than %ds" % warning_timer) 795 # Print the warning only once. 796 warning_timer = None 797 # If a machine is stuck switching runlevels 798 # This may cause the machine to reboot. 799 self.run('kill -HUP 1', ignore_status=True) 800 801 time.sleep(1) 802 current_time = int(time.time()) 803 804 return False 805 806 807 # tunable constants for the verify & repair code 808 AUTOTEST_GB_DISKSPACE_REQUIRED = get_value("SERVER", 809 "gb_diskspace_required", 810 type=float, 811 default=20.0) 812 813 814 def verify_connectivity(self): 815 super(AbstractSSHHost, self).verify_connectivity() 816 817 logging.info('Pinging host ' + self.host_port) 818 self.ssh_ping() 819 logging.info("Host (ssh) %s is alive", self.host_port) 820 821 if self.is_shutting_down(): 822 raise error.AutoservHostIsShuttingDownError("Host is shutting down") 823 824 825 def verify_software(self): 826 super(AbstractSSHHost, self).verify_software() 827 try: 828 self.check_diskspace(autotest.Autotest.get_install_dir(self), 829 self.AUTOTEST_GB_DISKSPACE_REQUIRED) 830 except error.AutoservDiskFullHostError: 831 # only want to raise if it's a space issue 832 raise 833 except (error.AutoservHostError, autotest.AutodirNotFoundError): 834 logging.exception('autodir space check exception, this is probably ' 835 'safe to ignore\n') 836 837 838 def close(self): 839 super(AbstractSSHHost, self).close() 840 self.rpc_server_tracker.disconnect_all() 841 if not self._connection_pool: 842 self._master_ssh.close() 843 if os.path.exists(self.known_hosts_file): 844 os.remove(self.known_hosts_file) 845 846 847 def restart_master_ssh(self): 848 """ 849 Stop and restart the ssh master connection. This is meant as a last 850 resort when ssh commands fail and we don't understand why. 851 """ 852 logging.debug('Restarting master ssh connection') 853 self._master_ssh.close() 854 self._master_ssh.maybe_start(timeout=30) 855 856 857 858 def start_master_ssh(self, timeout=DEFAULT_START_MASTER_SSH_TIMEOUT_S): 859 """ 860 Called whenever a slave SSH connection needs to be initiated (e.g., by 861 run, rsync, scp). If master SSH support is enabled and a master SSH 862 connection is not active already, start a new one in the background. 863 Also, cleanup any zombie master SSH connections (e.g., dead due to 864 reboot). 865 866 timeout: timeout in seconds (default 5) to wait for master ssh 867 connection to be established. If timeout is reached, a 868 warning message is logged, but no other action is taken. 869 """ 870 if not enable_master_ssh: 871 return 872 self._master_ssh.maybe_start(timeout=timeout) 873 874 875 def clear_known_hosts(self): 876 """Clears out the temporary ssh known_hosts file. 877 878 This is useful if the test SSHes to the machine, then reinstalls it, 879 then SSHes to it again. It can be called after the reinstall to 880 reduce the spam in the logs. 881 """ 882 logging.info("Clearing known hosts for host '%s', file '%s'.", 883 self.host_port, self.known_hosts_file) 884 # Clear out the file by opening it for writing and then closing. 885 fh = open(self.known_hosts_file, "w") 886 fh.close() 887 888 889 def collect_logs(self, remote_src_dir, local_dest_dir, ignore_errors=True): 890 """Copy log directories from a host to a local directory. 891 892 @param remote_src_dir: A destination directory on the host. 893 @param local_dest_dir: A path to a local destination directory. 894 If it doesn't exist it will be created. 895 @param ignore_errors: If True, ignore exceptions. 896 897 @raises OSError: If there were problems creating the local_dest_dir and 898 ignore_errors is False. 899 @raises AutoservRunError, AutotestRunError: If something goes wrong 900 while copying the directories and ignore_errors is False. 901 """ 902 if not self.check_cached_up_status(): 903 logging.warning('Host %s did not answer to ping, skip collecting ' 904 'logs.', self.host_port) 905 return 906 907 locally_created_dest = False 908 if (not os.path.exists(local_dest_dir) 909 or not os.path.isdir(local_dest_dir)): 910 try: 911 os.makedirs(local_dest_dir) 912 locally_created_dest = True 913 except OSError as e: 914 logging.warning('Unable to collect logs from host ' 915 '%s: %s', self.host_port, e) 916 if not ignore_errors: 917 raise 918 return 919 920 # Build test result directory summary 921 try: 922 result_tools_runner.run_on_client(self, remote_src_dir) 923 except (error.AutotestRunError, error.AutoservRunError, 924 error.AutoservSSHTimeout) as e: 925 logging.exception( 926 'Non-critical failure: Failed to collect and throttle ' 927 'results at %s from host %s', remote_src_dir, 928 self.host_port) 929 930 try: 931 self.get_file(remote_src_dir, local_dest_dir, safe_symlinks=True) 932 except (error.AutotestRunError, error.AutoservRunError, 933 error.AutoservSSHTimeout) as e: 934 logging.warning('Collection of %s to local dir %s from host %s ' 935 'failed: %s', remote_src_dir, local_dest_dir, 936 self.host_port, e) 937 if locally_created_dest: 938 shutil.rmtree(local_dest_dir, ignore_errors=ignore_errors) 939 if not ignore_errors: 940 raise 941 942 # Clean up directory summary file on the client side. 943 try: 944 result_tools_runner.run_on_client(self, remote_src_dir, 945 cleanup_only=True) 946 except (error.AutotestRunError, error.AutoservRunError, 947 error.AutoservSSHTimeout) as e: 948 logging.exception( 949 'Non-critical failure: Failed to cleanup result summary ' 950 'files at %s in host %s', remote_src_dir, self.hostname) 951 952 953 def create_ssh_tunnel(self, port, local_port): 954 """Create an ssh tunnel from local_port to port. 955 956 This is used to forward a port securely through a tunnel process from 957 the server to the DUT for RPC server connection. 958 959 @param port: remote port on the host. 960 @param local_port: local forwarding port. 961 962 @return: the tunnel process. 963 """ 964 tunnel_options = '-n -N -q -L %d:localhost:%d' % (local_port, port) 965 ssh_cmd = self.make_ssh_command(opts=tunnel_options, port=self.port) 966 tunnel_cmd = '%s %s' % (ssh_cmd, self.hostname) 967 logging.debug('Full tunnel command: %s', tunnel_cmd) 968 # Exec the ssh process directly here rather than using a shell. 969 # Using a shell leaves a dangling ssh process, because we deliver 970 # signals to the shell wrapping ssh, not the ssh process itself. 971 args = shlex.split(tunnel_cmd) 972 with open('/dev/null', 'w') as devnull: 973 tunnel_proc = subprocess.Popen(args, stdout=devnull, stderr=devnull, 974 close_fds=True) 975 logging.debug('Started ssh tunnel, local = %d' 976 ' remote = %d, pid = %d', 977 local_port, port, tunnel_proc.pid) 978 return tunnel_proc 979 980 981 def disconnect_ssh_tunnel(self, tunnel_proc): 982 """ 983 Disconnects a previously forwarded port from the server to the DUT for 984 RPC server connection. 985 986 @param tunnel_proc: a tunnel process returned from |create_ssh_tunnel|. 987 """ 988 if tunnel_proc.poll() is None: 989 tunnel_proc.terminate() 990 logging.debug('Terminated tunnel, pid %d', tunnel_proc.pid) 991 else: 992 logging.debug('Tunnel pid %d terminated early, status %d', 993 tunnel_proc.pid, tunnel_proc.returncode) 994 995 996 def get_os_type(self): 997 """Returns the host OS descriptor (to be implemented in subclasses). 998 999 @return A string describing the OS type. 1000 """ 1001 raise NotImplementedError 1002 1003 1004 def check_cached_up_status( 1005 self, expiration_seconds=_DEFAULT_UP_STATUS_EXPIRATION_SECONDS): 1006 """Check if the DUT responded to ping in the past `expiration_seconds`. 1007 1008 @param expiration_seconds: The number of seconds to keep the cached 1009 status of whether the DUT responded to ping. 1010 @return: True if the DUT has responded to ping during the past 1011 `expiration_seconds`. 1012 """ 1013 # Refresh the up status if any of following conditions is true: 1014 # * cached status is never set 1015 # * cached status is False, so the method can check if the host is up 1016 # again. 1017 # * If the cached status is older than `expiration_seconds` 1018 expire_time = time.time() - expiration_seconds 1019 if (self._cached_up_status_updated is None or 1020 not self._cached_up_status or 1021 self._cached_up_status_updated < expire_time): 1022 self._cached_up_status = self.is_up_fast() 1023 self._cached_up_status_updated = time.time() 1024 return self._cached_up_status 1025 1026 1027 def _track_class_usage(self): 1028 """Tracking which class was used. 1029 1030 The idea to identify unused classes to be able clean them up. 1031 We skip names with dynamic created classes where the name is 1032 hostname of the device. 1033 """ 1034 class_name = None 1035 if 'chrome' not in self.__class__.__name__: 1036 class_name = self.__class__.__name__ 1037 else: 1038 for base in self.__class__.__bases__: 1039 if 'chrome' not in base.__name__: 1040 class_name = base.__name__ 1041 break 1042 if class_name: 1043 data = {'host_class': class_name} 1044 metrics.Counter( 1045 'chromeos/autotest/used_hosts').increment(fields=data) 1046 1047 def is_file_exists(self, file_path): 1048 """Check whether a given file is exist on the host. 1049 """ 1050 result = self.run('test -f ' + file_path, 1051 timeout=30, 1052 ignore_status=True) 1053 return result.exit_status == 0 1054