1# Lint as: python2, python3 2# Copyright 2009 Google Inc. Released under the GPL v2 3 4""" 5This module defines the base classes for the Host hierarchy. 6 7Implementation details: 8You should import the "hosts" package instead of importing each type of host. 9 10 Host: a machine on which you can run programs 11""" 12 13from __future__ import absolute_import 14from __future__ import division 15from __future__ import print_function 16 17 18__author__ = """ 19mbligh@google.com (Martin J. Bligh), 20poirier@google.com (Benjamin Poirier), 21stutsman@google.com (Ryan Stutsman) 22""" 23 24import json, logging, os, re, time 25 26from autotest_lib.client.common_lib import global_config, error, utils 27from autotest_lib.client.common_lib.cros import path_utils 28import six 29 30 31class Host(object): 32 """ 33 This class represents a machine on which you can run programs. 34 35 It may be a local machine, the one autoserv is running on, a remote 36 machine or a virtual machine. 37 38 Implementation details: 39 This is an abstract class, leaf subclasses must implement the methods 40 listed here. You must not instantiate this class but should 41 instantiate one of those leaf subclasses. 42 43 When overriding methods that raise NotImplementedError, the leaf class 44 is fully responsible for the implementation and should not chain calls 45 to super. When overriding methods that are a NOP in Host, the subclass 46 should chain calls to super(). The criteria for fitting a new method into 47 one category or the other should be: 48 1. If two separate generic implementations could reasonably be 49 concatenated, then the abstract implementation should pass and 50 subclasses should chain calls to super. 51 2. If only one class could reasonably perform the stated function 52 (e.g. two separate run() implementations cannot both be executed) 53 then the method should raise NotImplementedError in Host, and 54 the implementor should NOT chain calls to super, to ensure that 55 only one implementation ever gets executed. 56 """ 57 58 job = None 59 DEFAULT_REBOOT_TIMEOUT = global_config.global_config.get_config_value( 60 "HOSTS", "default_reboot_timeout", type=int, default=1800) 61 WAIT_DOWN_REBOOT_TIMEOUT = global_config.global_config.get_config_value( 62 "HOSTS", "wait_down_reboot_timeout", type=int, default=840) 63 WAIT_DOWN_REBOOT_WARNING = global_config.global_config.get_config_value( 64 "HOSTS", "wait_down_reboot_warning", type=int, default=540) 65 HOURS_TO_WAIT_FOR_RECOVERY = global_config.global_config.get_config_value( 66 "HOSTS", "hours_to_wait_for_recovery", type=float, default=2.5) 67 # the number of hardware repair requests that need to happen before we 68 # actually send machines to hardware repair 69 HARDWARE_REPAIR_REQUEST_THRESHOLD = 4 70 OP_REBOOT = 'reboot' 71 OP_SUSPEND = 'suspend' 72 PWR_OPERATION = [OP_REBOOT, OP_SUSPEND] 73 74 75 def __init__(self, *args, **dargs): 76 self._initialize(*args, **dargs) 77 78 79 def _initialize(self, *args, **dargs): 80 pass 81 82 83 @property 84 def job_repo_url_attribute(self): 85 """Get the host attribute name for job_repo_url. 86 """ 87 return 'job_repo_url' 88 89 90 def close(self): 91 """Close the connection to the host. 92 """ 93 pass 94 95 96 def setup(self): 97 """Setup the host object. 98 """ 99 pass 100 101 102 def run(self, command, timeout=3600, ignore_status=False, 103 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS, 104 stdin=None, args=()): 105 """ 106 Run a command on this host. 107 108 @param command: the command line string 109 @param timeout: time limit in seconds before attempting to 110 kill the running process. The run() function 111 will take a few seconds longer than 'timeout' 112 to complete if it has to kill the process. 113 @param ignore_status: do not raise an exception, no matter 114 what the exit code of the command is. 115 @param stdout_tee: where to tee the stdout 116 @param stderr_tee: where to tee the stderr 117 @param stdin: stdin to pass (a string) to the executed command 118 @param args: sequence of strings to pass as arguments to command by 119 quoting them in " and escaping their contents if necessary 120 121 @return a utils.CmdResult object 122 123 @raises AutotestHostRunError: the exit code of the command execution 124 was not 0 and ignore_status was not enabled 125 """ 126 raise NotImplementedError('Run not implemented!') 127 128 129 def run_output(self, command, *args, **dargs): 130 """Run and retrieve the value of stdout stripped of whitespace. 131 132 @param command: Command to execute. 133 @param *args: Extra arguments to run. 134 @param **dargs: Extra keyword arguments to run. 135 136 @return: String value of stdout. 137 """ 138 return self.run(command, *args, **dargs).stdout.rstrip() 139 140 141 def reboot(self): 142 """Reboot the host. 143 """ 144 raise NotImplementedError('Reboot not implemented!') 145 146 147 def suspend(self): 148 """Suspend the host. 149 """ 150 raise NotImplementedError('Suspend not implemented!') 151 152 153 def sysrq_reboot(self): 154 """Execute host reboot via SysRq key. 155 """ 156 raise NotImplementedError('Sysrq reboot not implemented!') 157 158 159 def reboot_setup(self, *args, **dargs): 160 """Prepare for reboot. 161 162 This doesn't appear to be implemented by any current hosts. 163 164 @param *args: Extra arguments to ?. 165 @param **dargs: Extra keyword arguments to ?. 166 """ 167 pass 168 169 170 def reboot_followup(self, *args, **dargs): 171 """Post reboot work. 172 173 This doesn't appear to be implemented by any current hosts. 174 175 @param *args: Extra arguments to ?. 176 @param **dargs: Extra keyword arguments to ?. 177 """ 178 pass 179 180 181 def get_file(self, source, dest, delete_dest=False): 182 """Retrieve a file from the host. 183 184 @param source: Remote file path (directory, file or list). 185 @param dest: Local file path (directory, file or list). 186 @param delete_dest: Delete files in remote path that are not in local 187 path. 188 """ 189 raise NotImplementedError('Get file not implemented!') 190 191 192 def send_file(self, source, dest, delete_dest=False, excludes=None): 193 """Send a file to the host. 194 195 @param source: Local file path (directory, file or list). 196 @param dest: Remote file path (directory, file or list). 197 @param delete_dest: Delete files in remote path that are not in local 198 path. 199 @param excludes: A list of file pattern that matches files not to be 200 sent. `send_file` will fail if exclude is not 201 supported. 202 """ 203 raise NotImplementedError('Send file not implemented!') 204 205 206 def get_tmp_dir(self): 207 """Create a temporary directory on the host. 208 """ 209 raise NotImplementedError('Get temp dir not implemented!') 210 211 212 def is_up(self): 213 """Confirm the host is online. 214 """ 215 raise NotImplementedError('Is up not implemented!') 216 217 218 def is_shutting_down(self): 219 """ Indicates is a machine is currently shutting down. """ 220 return False 221 222 223 def get_wait_up_processes(self): 224 """ Gets the list of local processes to wait for in wait_up. """ 225 get_config = global_config.global_config.get_config_value 226 proc_list = get_config("HOSTS", "wait_up_processes", 227 default="").strip() 228 processes = set(p.strip() for p in proc_list.split(",")) 229 processes.discard("") 230 return processes 231 232 233 def get_boot_id(self, timeout=60): 234 """ Get a unique ID associated with the current boot. 235 236 Should return a string with the semantics such that two separate 237 calls to Host.get_boot_id() return the same string if the host did 238 not reboot between the two calls, and two different strings if it 239 has rebooted at least once between the two calls. 240 241 @param timeout The number of seconds to wait before timing out. 242 243 @return A string unique to this boot or None if not available.""" 244 BOOT_ID_FILE = '/proc/sys/kernel/random/boot_id' 245 NO_ID_MSG = 'no boot_id available' 246 cmd = 'if [ -f %r ]; then cat %r; else echo %r; fi' % ( 247 BOOT_ID_FILE, BOOT_ID_FILE, NO_ID_MSG) 248 boot_id = self.run(cmd, timeout=timeout).stdout.strip() 249 if boot_id == NO_ID_MSG: 250 return None 251 return boot_id 252 253 254 def wait_up(self, timeout=None): 255 """Wait for the host to come up. 256 257 @param timeout: Max seconds to wait. 258 """ 259 raise NotImplementedError('Wait up not implemented!') 260 261 262 def wait_down(self, timeout=None, warning_timer=None, old_boot_id=None): 263 """Wait for the host to go down. 264 265 @param timeout: Max seconds to wait before returning. 266 @param warning_timer: Seconds before warning host is not down. 267 @param old_boot_id: Result of self.get_boot_id() before shutdown. 268 """ 269 raise NotImplementedError('Wait down not implemented!') 270 271 272 def _construct_host_metadata(self, type_str): 273 """Returns dict of metadata with type_str, hostname, time_recorded. 274 275 @param type_str: String representing _type field in es db. 276 For example: type_str='reboot_total'. 277 """ 278 metadata = { 279 'hostname': self.hostname, 280 'time_recorded': time.time(), 281 '_type': type_str, 282 } 283 return metadata 284 285 286 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, 287 down_timeout=WAIT_DOWN_REBOOT_TIMEOUT, 288 down_warning=WAIT_DOWN_REBOOT_WARNING, 289 log_failure=True, old_boot_id=None, **dargs): 290 """Wait for the host to come back from a reboot. 291 292 This is a generic implementation based entirely on wait_up and 293 wait_down. 294 295 @param timeout: Max seconds to wait for reboot to start. 296 @param down_timeout: Max seconds to wait for host to go down. 297 @param down_warning: Seconds to wait before warning host hasn't gone 298 down. 299 @param log_failure: bool(Log when host does not go down.) 300 @param old_boot_id: Result of self.get_boot_id() before restart. 301 @param **dargs: Extra arguments to reboot_followup. 302 303 @raises AutoservRebootError if host does not come back up. 304 """ 305 if not self.wait_down(timeout=down_timeout, 306 warning_timer=down_warning, 307 old_boot_id=old_boot_id): 308 if log_failure: 309 self.record("ABORT", None, "reboot.verify", "shut down failed") 310 raise error.AutoservShutdownError("Host did not shut down") 311 if self.wait_up(timeout): 312 self.record("GOOD", None, "reboot.verify") 313 self.reboot_followup(**dargs) 314 else: 315 self.record("ABORT", None, "reboot.verify", 316 "Host did not return from reboot") 317 raise error.AutoservRebootError("Host did not return from reboot") 318 319 320 def verify(self): 321 """Check if host is in good state. 322 """ 323 self.verify_hardware() 324 self.verify_connectivity() 325 self.verify_software() 326 327 328 def verify_hardware(self): 329 """Check host hardware. 330 """ 331 pass 332 333 334 def verify_connectivity(self): 335 """Check host network connectivity. 336 """ 337 pass 338 339 340 def verify_software(self): 341 """Check host software. 342 """ 343 pass 344 345 346 def check_diskspace(self, path, gb): 347 """Raises an error if path does not have at least gb GB free. 348 349 @param path The path to check for free disk space. 350 @param gb A floating point number to compare with a granularity 351 of 1 MB. 352 353 1000 based SI units are used. 354 355 @raises AutoservDiskFullHostError if path has less than gb GB free. 356 @raises AutoservDirectoryNotFoundError if path is not a valid directory. 357 @raises AutoservDiskSizeUnknownError the return from du is not parsed 358 correctly. 359 """ 360 one_mb = 10 ** 6 # Bytes (SI unit). 361 mb_per_gb = 1000.0 362 logging.info('Checking for >= %s GB of space under %s on machine %s', 363 gb, path, self.hostname) 364 365 if not self.path_exists(path): 366 msg = 'Path does not exist on host: %s' % path 367 logging.warning(msg) 368 raise error.AutoservDirectoryNotFoundError(msg) 369 370 cmd = 'df -PB %d %s | tail -1' % (one_mb, path) 371 df = self.run(cmd).stdout.split() 372 try: 373 free_space_gb = int(df[3]) / mb_per_gb 374 except (IndexError, ValueError): 375 msg = ('Could not determine the size of %s. ' 376 'Output from df: %s') % (path, df) 377 logging.error(msg) 378 raise error.AutoservDiskSizeUnknownError(msg) 379 380 if free_space_gb < gb: 381 raise error.AutoservDiskFullHostError(path, gb, free_space_gb) 382 else: 383 logging.info('Found %s GB >= %s GB of space under %s on machine %s', 384 free_space_gb, gb, path, self.hostname) 385 386 387 def check_inodes(self, path, min_kilo_inodes): 388 """Raises an error if a file system is short on i-nodes. 389 390 @param path The path to check for free i-nodes. 391 @param min_kilo_inodes Minimum number of i-nodes required, 392 in units of 1000 i-nodes. 393 394 @raises AutoservNoFreeInodesError If the minimum required 395 i-node count isn't available. 396 """ 397 min_inodes = 1000 * min_kilo_inodes 398 logging.info('Checking for >= %d i-nodes under %s ' 399 'on machine %s', min_inodes, path, self.hostname) 400 df = self.run('df -Pi %s | tail -1' % path).stdout.split() 401 free_inodes = int(df[3]) 402 if free_inodes < min_inodes: 403 raise error.AutoservNoFreeInodesError(path, min_inodes, 404 free_inodes) 405 else: 406 logging.info('Found %d >= %d i-nodes under %s on ' 407 'machine %s', free_inodes, min_inodes, 408 path, self.hostname) 409 410 411 def erase_dir_contents(self, path, ignore_status=True, timeout=3600): 412 """Empty a given directory path contents. 413 414 @param path: Path to empty. 415 @param ignore_status: Ignore the exit status from run. 416 @param timeout: Max seconds to allow command to complete. 417 """ 418 rm_cmd = 'find "%s" -mindepth 1 -maxdepth 1 -print0 | xargs -0 rm -rf' 419 self.run(rm_cmd % path, ignore_status=ignore_status, timeout=timeout) 420 421 422 def repair(self): 423 """Try and get the host to pass `self.verify()`.""" 424 self.verify() 425 426 427 def disable_ipfilters(self): 428 """Allow all network packets in and out of the host.""" 429 self.run('iptables-save > /tmp/iptable-rules') 430 self.run('iptables -P INPUT ACCEPT') 431 self.run('iptables -P FORWARD ACCEPT') 432 self.run('iptables -P OUTPUT ACCEPT') 433 434 435 def enable_ipfilters(self): 436 """Re-enable the IP filters disabled from disable_ipfilters()""" 437 if self.path_exists('/tmp/iptable-rules'): 438 self.run('iptables-restore < /tmp/iptable-rules') 439 440 441 def cleanup(self): 442 """Restore host to clean state. 443 """ 444 pass 445 446 447 def install(self, installableObject): 448 """Call install on a thing. 449 450 @param installableObject: Thing with install method that will accept our 451 self. 452 """ 453 installableObject.install(self) 454 455 456 def get_autodir(self): 457 raise NotImplementedError('Get autodir not implemented!') 458 459 460 def set_autodir(self): 461 raise NotImplementedError('Set autodir not implemented!') 462 463 464 def start_loggers(self): 465 """ Called to start continuous host logging. """ 466 pass 467 468 469 def stop_loggers(self): 470 """ Called to stop continuous host logging. """ 471 pass 472 473 474 # some extra methods simplify the retrieval of information about the 475 # Host machine, with generic implementations based on run(). subclasses 476 # should feel free to override these if they can provide better 477 # implementations for their specific Host types 478 479 def get_num_cpu(self): 480 """ Get the number of CPUs in the host according to /proc/cpuinfo. """ 481 proc_cpuinfo = self.run('cat /proc/cpuinfo', 482 stdout_tee=open(os.devnull, 'w')).stdout 483 cpus = 0 484 for line in proc_cpuinfo.splitlines(): 485 if line.startswith('processor'): 486 cpus += 1 487 return cpus 488 489 490 def get_arch(self): 491 """ Get the hardware architecture of the remote machine. """ 492 cmd_uname = path_utils.must_be_installed('/bin/uname', host=self) 493 arch = self.run('%s -m' % cmd_uname).stdout.rstrip() 494 if re.match(r'i\d86$', arch): 495 arch = 'i386' 496 return arch 497 498 499 def get_kernel_ver(self): 500 """ Get the kernel version of the remote machine. """ 501 cmd_uname = path_utils.must_be_installed('/bin/uname', host=self) 502 return self.run('%s -r' % cmd_uname).stdout.rstrip() 503 504 505 def get_cmdline(self): 506 """ Get the kernel command line of the remote machine. """ 507 return self.run('cat /proc/cmdline').stdout.rstrip() 508 509 510 def get_meminfo(self): 511 """ Get the kernel memory info (/proc/meminfo) of the remote machine 512 and return a dictionary mapping the various statistics. """ 513 meminfo_dict = {} 514 meminfo = self.run('cat /proc/meminfo').stdout.splitlines() 515 for key, val in (line.split(':', 1) for line in meminfo): 516 meminfo_dict[key.strip()] = val.strip() 517 return meminfo_dict 518 519 520 def path_exists(self, path): 521 """Determine if path exists on the remote machine. 522 523 @param path: path to check 524 525 @return: bool(path exists)""" 526 result = self.run('test -e "%s"' % utils.sh_escape(path), 527 ignore_status=True) 528 return result.exit_status == 0 529 530 531 # some extra helpers for doing job-related operations 532 533 def record(self, *args, **dargs): 534 """ Helper method for recording status logs against Host.job that 535 silently becomes a NOP if Host.job is not available. The args and 536 dargs are passed on to Host.job.record unchanged. """ 537 if self.job: 538 self.job.record(*args, **dargs) 539 540 541 def log_kernel(self): 542 """ Helper method for logging kernel information into the status logs. 543 Intended for cases where the "current" kernel is not really defined 544 and we want to explicitly log it. Does nothing if this host isn't 545 actually associated with a job. """ 546 if self.job: 547 kernel = self.get_kernel_ver() 548 self.job.record("INFO", None, None, 549 optional_fields={"kernel": kernel}) 550 551 552 def log_op(self, op, op_func): 553 """ Decorator for wrapping a management operaiton in a group for status 554 logging purposes. 555 556 @param op: name of the operation. 557 @param op_func: a function that carries out the operation 558 (reboot, suspend) 559 """ 560 if self.job and not hasattr(self, "RUNNING_LOG_OP"): 561 self.RUNNING_LOG_OP = True 562 try: 563 self.job.run_op(op, op_func, self.get_kernel_ver) 564 finally: 565 del self.RUNNING_LOG_OP 566 else: 567 op_func() 568 569 570 def list_files_glob(self, glob): 571 """Get a list of files on a remote host given a glob pattern path. 572 573 @param glob: pattern 574 575 @return: list of files 576 """ 577 SCRIPT = ("python -c 'import json, glob, sys;" 578 "json.dump(glob.glob(sys.argv[1]), sys.stdout)'") 579 output = self.run(SCRIPT, args=(glob,), stdout_tee=None, 580 timeout=60).stdout 581 return json.loads(output) 582 583 584 def symlink_closure(self, paths): 585 """ 586 Given a sequence of path strings, return the set of all paths that 587 can be reached from the initial set by following symlinks. 588 589 @param paths: sequence of path strings. 590 @return: a sequence of path strings that are all the unique paths that 591 can be reached from the given ones after following symlinks. 592 """ 593 SCRIPT = ("python -c 'import json, os, sys\n" 594 "paths = json.load(sys.stdin)\n" 595 "closure = {}\n" 596 "while paths:\n" 597 " path = next(iter(paths))\n" 598 " del paths[path]\n" 599 " if not os.path.exists(path):\n" 600 " continue\n" 601 " closure[path] = None\n" 602 " if os.path.islink(path):\n" 603 " link_to = os.path.join(os.path.dirname(path),\n" 604 " os.readlink(path))\n" 605 " if link_to not in closure:\n" 606 " paths[link_to] = None\n" 607 "json.dump(closure.keys(), sys.stdout, 0)'") 608 input_data = json.dumps(dict((path, None) for path in paths), 0) 609 output = self.run(SCRIPT, stdout_tee=None, stdin=input_data, 610 timeout=60).stdout 611 return json.loads(output) 612 613 614 def cleanup_kernels(self, boot_dir='/boot'): 615 """ 616 Remove any kernel image and associated files (vmlinux, system.map, 617 modules) for any image found in the boot directory that is not 618 referenced by entries in the bootloader configuration. 619 620 @param boot_dir: boot directory path string, default '/boot' 621 """ 622 # find all the vmlinuz images referenced by the bootloader 623 vmlinuz_prefix = os.path.join(boot_dir, 'vmlinuz-') 624 boot_info = self.bootloader.get_entries() 625 used_kernver = [boot['kernel'][len(vmlinuz_prefix):] 626 for boot in six.itervalues(boot_info)] 627 628 # find all the unused vmlinuz images in /boot 629 all_vmlinuz = self.list_files_glob(vmlinuz_prefix + '*') 630 used_vmlinuz = self.symlink_closure(vmlinuz_prefix + kernver 631 for kernver in used_kernver) 632 unused_vmlinuz = set(all_vmlinuz) - set(used_vmlinuz) 633 634 # find all the unused vmlinux images in /boot 635 vmlinux_prefix = os.path.join(boot_dir, 'vmlinux-') 636 all_vmlinux = self.list_files_glob(vmlinux_prefix + '*') 637 used_vmlinux = self.symlink_closure(vmlinux_prefix + kernver 638 for kernver in used_kernver) 639 unused_vmlinux = set(all_vmlinux) - set(used_vmlinux) 640 641 # find all the unused System.map files in /boot 642 systemmap_prefix = os.path.join(boot_dir, 'System.map-') 643 all_system_map = self.list_files_glob(systemmap_prefix + '*') 644 used_system_map = self.symlink_closure( 645 systemmap_prefix + kernver for kernver in used_kernver) 646 unused_system_map = set(all_system_map) - set(used_system_map) 647 648 # find all the module directories associated with unused kernels 649 modules_prefix = '/lib/modules/' 650 all_moddirs = [dir for dir in self.list_files_glob(modules_prefix + '*') 651 if re.match(modules_prefix + r'\d+\.\d+\.\d+.*', dir)] 652 used_moddirs = self.symlink_closure(modules_prefix + kernver 653 for kernver in used_kernver) 654 unused_moddirs = set(all_moddirs) - set(used_moddirs) 655 656 # remove all the vmlinuz files we don't use 657 # TODO: if needed this should become package manager agnostic 658 for vmlinuz in unused_vmlinuz: 659 # try and get an rpm package name 660 rpm = self.run('rpm -qf', args=(vmlinuz,), 661 ignore_status=True, timeout=120) 662 if rpm.exit_status == 0: 663 packages = set(line.strip() for line in 664 rpm.stdout.splitlines()) 665 # if we found some package names, try to remove them 666 for package in packages: 667 self.run('rpm -e', args=(package,), 668 ignore_status=True, timeout=120) 669 # remove the image files anyway, even if rpm didn't 670 self.run('rm -f', args=(vmlinuz,), 671 ignore_status=True, timeout=120) 672 673 # remove all the vmlinux and System.map files left over 674 for f in (unused_vmlinux | unused_system_map): 675 self.run('rm -f', args=(f,), 676 ignore_status=True, timeout=120) 677 678 # remove all unused module directories 679 # the regex match should keep us safe from removing the wrong files 680 for moddir in unused_moddirs: 681 self.run('rm -fr', args=(moddir,), ignore_status=True) 682 683 684 def get_attributes_to_clear_before_provision(self): 685 """Get a list of attributes to be cleared before machine_install starts. 686 687 If provision runs in a lab environment, it is necessary to clear certain 688 host attributes for the host in afe_host_attributes table. For example, 689 `job_repo_url` is a devserver url pointed to autotest packages for 690 CrosHost, it needs to be removed before provision starts for tests to 691 run reliably. 692 """ 693 return ['job_repo_url'] 694 695 696 def get_platform(self): 697 """Determine the correct platform label for this host. 698 699 @return: A string representing this host's platform. 700 """ 701 raise NotImplementedError("Get platform not implemented!") 702 703 704 def get_labels(self): 705 """Return a list of the labels gathered from the devices connected. 706 707 @return: A list of strings that denote the labels from all the devices 708 connected. 709 """ 710 raise NotImplementedError("Get labels not implemented!") 711 712 713 def check_cached_up_status(self, expiration_seconds): 714 """Check if the DUT responded to ping in the past `expiration_seconds`. 715 716 @param expiration_seconds: The number of seconds to keep the cached 717 status of whether the DUT responded to ping. 718 @return: True if the DUT has responded to ping during the past 719 `expiration_seconds`. 720 """ 721 raise NotImplementedError("check_cached_up_status not implemented!") 722