1# Copyright 2007 Google Inc. Released under the GPL v2 2#pylint: disable-msg=C0111 3 4import glob 5import logging 6import os 7import re 8import sys 9import tempfile 10import time 11import traceback 12 13import common 14from autotest_lib.client.bin.result_tools import runner as result_tools_runner 15from autotest_lib.client.common_lib import autotemp 16from autotest_lib.client.common_lib import base_job 17from autotest_lib.client.common_lib import error 18from autotest_lib.client.common_lib import packages 19from autotest_lib.client.common_lib import global_config 20from autotest_lib.client.common_lib import utils as client_utils 21from autotest_lib.server import installable_object 22from autotest_lib.server import prebuild 23from autotest_lib.server import utils 24 25try: 26 from chromite.lib import metrics 27except ImportError: 28 metrics = client_utils.metrics_mock 29 30 31AUTOTEST_SVN = 'svn://test.kernel.org/autotest/trunk/client' 32AUTOTEST_HTTP = 'http://test.kernel.org/svn/autotest/trunk/client' 33 34CONFIG = global_config.global_config 35AUTOSERV_PREBUILD = CONFIG.get_config_value( 36 'AUTOSERV', 'enable_server_prebuild', type=bool, default=False) 37 38ENABLE_RESULT_THROTTLING = CONFIG.get_config_value( 39 'AUTOSERV', 'enable_result_throttling', type=bool, default=False) 40 41class AutodirNotFoundError(Exception): 42 """No Autotest installation could be found.""" 43 44 45class AutotestFailure(Exception): 46 """Gereric exception class for failures during a test run.""" 47 48 49class AutotestAbort(AutotestFailure): 50 """ 51 AutotestAborts are thrown when the DUT seems fine, 52 and the test doesn't give us an explicit reason for 53 failure; In this case we have no choice but to abort. 54 """ 55 56 57class AutotestDeviceError(AutotestFailure): 58 """ 59 Exceptions that inherit from AutotestDeviceError 60 are thrown when we can determine the current 61 state of the DUT and conclude that it probably 62 lead to the test failing; these exceptions lead 63 to failures instead of aborts. 64 """ 65 66 67class AutotestDeviceNotPingable(AutotestDeviceError): 68 """Error for when a DUT becomes unpingable.""" 69 70 71class AutotestDeviceNotSSHable(AutotestDeviceError): 72 """Error for when a DUT is pingable but not SSHable.""" 73 74 75class AutotestDeviceRebooted(AutotestDeviceError): 76 """Error for when a DUT rebooted unexpectedly.""" 77 78 79class BaseAutotest(installable_object.InstallableObject): 80 """ 81 This class represents the Autotest program. 82 83 Autotest is used to run tests automatically and collect the results. 84 It also supports profilers. 85 86 Implementation details: 87 This is a leaf class in an abstract class hierarchy, it must 88 implement the unimplemented methods in parent classes. 89 """ 90 91 def __init__(self, host=None): 92 self.host = host 93 self.got = False 94 self.installed = False 95 self.serverdir = utils.get_server_dir() 96 super(BaseAutotest, self).__init__() 97 98 99 install_in_tmpdir = False 100 @classmethod 101 def set_install_in_tmpdir(cls, flag): 102 """ Sets a flag that controls whether or not Autotest should by 103 default be installed in a "standard" directory (e.g. 104 /home/autotest, /usr/local/autotest) or a temporary directory. """ 105 cls.install_in_tmpdir = flag 106 107 108 @classmethod 109 def get_client_autodir_paths(cls, host): 110 return global_config.global_config.get_config_value( 111 'AUTOSERV', 'client_autodir_paths', type=list) 112 113 114 @classmethod 115 def get_installed_autodir(cls, host): 116 """ 117 Find where the Autotest client is installed on the host. 118 @returns an absolute path to an installed Autotest client root. 119 @raises AutodirNotFoundError if no Autotest installation can be found. 120 """ 121 autodir = host.get_autodir() 122 if autodir: 123 logging.debug('Using existing host autodir: %s', autodir) 124 return autodir 125 126 for path in Autotest.get_client_autodir_paths(host): 127 try: 128 autotest_binary = os.path.join(path, 'bin', 'autotest') 129 host.run('test -x %s' % utils.sh_escape(autotest_binary)) 130 host.run('test -w %s' % utils.sh_escape(path)) 131 logging.debug('Found existing autodir at %s', path) 132 return path 133 except error.GenericHostRunError: 134 logging.debug('%s does not exist on %s', autotest_binary, 135 host.hostname) 136 raise AutodirNotFoundError 137 138 139 @classmethod 140 def get_install_dir(cls, host): 141 """ 142 Determines the location where autotest should be installed on 143 host. If self.install_in_tmpdir is set, it will return a unique 144 temporary directory that autotest can be installed in. Otherwise, looks 145 for an existing installation to use; if none is found, looks for a 146 usable directory in the global config client_autodir_paths. 147 """ 148 try: 149 install_dir = cls.get_installed_autodir(host) 150 except AutodirNotFoundError: 151 install_dir = cls._find_installable_dir(host) 152 153 if cls.install_in_tmpdir: 154 return host.get_tmp_dir(parent=install_dir) 155 return install_dir 156 157 158 @classmethod 159 def _find_installable_dir(cls, host): 160 client_autodir_paths = cls.get_client_autodir_paths(host) 161 for path in client_autodir_paths: 162 try: 163 host.run('mkdir -p %s' % utils.sh_escape(path)) 164 host.run('test -w %s' % utils.sh_escape(path)) 165 return path 166 except error.AutoservRunError: 167 logging.debug('Failed to create %s', path) 168 metrics.Counter( 169 'chromeos/autotest/errors/no_autotest_install_path').increment( 170 fields={'dut_host_name': host.hostname}) 171 raise error.AutoservInstallError( 172 'Unable to find a place to install Autotest; tried %s' % 173 ', '.join(client_autodir_paths)) 174 175 176 def get_fetch_location(self): 177 c = global_config.global_config 178 repos = c.get_config_value("PACKAGES", 'fetch_location', type=list, 179 default=[]) 180 repos.reverse() 181 return repos 182 183 184 def install(self, host=None, autodir=None, use_packaging=True): 185 self._install(host=host, autodir=autodir, use_packaging=use_packaging) 186 187 188 def install_full_client(self, host=None, autodir=None): 189 self._install(host=host, autodir=autodir, use_autoserv=False, 190 use_packaging=False) 191 192 193 def install_no_autoserv(self, host=None, autodir=None): 194 self._install(host=host, autodir=autodir, use_autoserv=False) 195 196 197 def _install_using_packaging(self, host, autodir): 198 repos = self.get_fetch_location() 199 if not repos: 200 raise error.PackageInstallError("No repos to install an " 201 "autotest client from") 202 pkgmgr = packages.PackageManager(autodir, hostname=host.hostname, 203 repo_urls=repos, 204 do_locking=False, 205 run_function=host.run, 206 run_function_dargs=dict(timeout=600)) 207 # The packages dir is used to store all the packages that 208 # are fetched on that client. (for the tests,deps etc. 209 # too apart from the client) 210 pkg_dir = os.path.join(autodir, 'packages') 211 # clean up the autodir except for the packages directory 212 host.run('cd %s && ls | grep -v "^packages$"' 213 ' | xargs rm -rf && rm -rf .[!.]*' % autodir) 214 pkgmgr.install_pkg('autotest', 'client', pkg_dir, autodir, 215 preserve_install_dir=True) 216 self.installed = True 217 218 219 def _install_using_send_file(self, host, autodir): 220 dirs_to_exclude = set(["tests", "site_tests", "deps", "profilers", 221 "packages"]) 222 light_files = [os.path.join(self.source_material, f) 223 for f in os.listdir(self.source_material) 224 if f not in dirs_to_exclude] 225 host.send_file(light_files, autodir, delete_dest=True) 226 227 # create empty dirs for all the stuff we excluded 228 commands = [] 229 for path in dirs_to_exclude: 230 abs_path = os.path.join(autodir, path) 231 abs_path = utils.sh_escape(abs_path) 232 commands.append("mkdir -p '%s'" % abs_path) 233 commands.append("touch '%s'/__init__.py" % abs_path) 234 host.run(';'.join(commands)) 235 236 237 def _install(self, host=None, autodir=None, use_autoserv=True, 238 use_packaging=True): 239 """ 240 Install autotest. If get() was not called previously, an 241 attempt will be made to install from the autotest svn 242 repository. 243 244 @param host A Host instance on which autotest will be installed 245 @param autodir Location on the remote host to install to 246 @param use_autoserv Enable install modes that depend on the client 247 running with the autoserv harness 248 @param use_packaging Enable install modes that use the packaging system 249 250 @exception AutoservError if a tarball was not specified and 251 the target host does not have svn installed in its path 252 """ 253 if not host: 254 host = self.host 255 if not self.got: 256 self.get() 257 host.wait_up(timeout=30) 258 host.setup() 259 logging.info("Installing autotest on %s", host.hostname) 260 261 # set up the autotest directory on the remote machine 262 if not autodir: 263 autodir = self.get_install_dir(host) 264 logging.info('Using installation dir %s', autodir) 265 host.set_autodir(autodir) 266 host.run('mkdir -p %s' % utils.sh_escape(autodir)) 267 268 # make sure there are no files in $AUTODIR/results 269 results_path = os.path.join(autodir, 'results') 270 host.run('rm -rf %s/*' % utils.sh_escape(results_path), 271 ignore_status=True) 272 273 # Fetch the autotest client from the nearest repository 274 if use_packaging: 275 try: 276 self._install_using_packaging(host, autodir) 277 logging.info("Installation of autotest completed using the " 278 "packaging system.") 279 return 280 except (error.PackageInstallError, error.AutoservRunError, 281 global_config.ConfigError), e: 282 logging.info("Could not install autotest using the packaging " 283 "system: %s. Trying other methods", e) 284 else: 285 # Delete the package checksum file to force dut updating local 286 # packages. 287 command = ('rm -f "%s"' % 288 (os.path.join(autodir, packages.CHECKSUM_FILE))) 289 host.run(command) 290 291 # try to install from file or directory 292 if self.source_material: 293 c = global_config.global_config 294 supports_autoserv_packaging = c.get_config_value( 295 "PACKAGES", "serve_packages_from_autoserv", type=bool) 296 # Copy autotest recursively 297 if supports_autoserv_packaging and use_autoserv: 298 self._install_using_send_file(host, autodir) 299 else: 300 host.send_file(self.source_material, autodir, delete_dest=True) 301 logging.info("Installation of autotest completed from %s", 302 self.source_material) 303 self.installed = True 304 return 305 306 # if that fails try to install using svn 307 if utils.run('which svn').exit_status: 308 raise error.AutoservError('svn not found on target machine: %s' % 309 host.hostname) 310 try: 311 host.run('svn checkout %s %s' % (AUTOTEST_SVN, autodir)) 312 except error.AutoservRunError, e: 313 host.run('svn checkout %s %s' % (AUTOTEST_HTTP, autodir)) 314 logging.info("Installation of autotest completed using SVN.") 315 self.installed = True 316 317 318 def uninstall(self, host=None): 319 """ 320 Uninstall (i.e. delete) autotest. Removes the autotest client install 321 from the specified host. 322 323 @params host a Host instance from which the client will be removed 324 """ 325 if not self.installed: 326 return 327 if not host: 328 host = self.host 329 autodir = host.get_autodir() 330 if not autodir: 331 return 332 333 # perform the actual uninstall 334 host.run("rm -rf %s" % utils.sh_escape(autodir), ignore_status=True) 335 host.set_autodir(None) 336 self.installed = False 337 338 339 def get(self, location=None): 340 if not location: 341 location = os.path.join(self.serverdir, '../client') 342 location = os.path.abspath(location) 343 # If there's stuff run on our client directory already, it 344 # can cause problems. Try giving it a quick clean first. 345 cwd = os.getcwd() 346 os.chdir(location) 347 try: 348 utils.system('tools/make_clean', ignore_status=True) 349 finally: 350 os.chdir(cwd) 351 super(BaseAutotest, self).get(location) 352 self.got = True 353 354 355 def run(self, control_file, results_dir='.', host=None, timeout=None, 356 tag=None, parallel_flag=False, client_disconnect_timeout=None, 357 use_packaging=True): 358 """ 359 Run an autotest job on the remote machine. 360 361 @param control_file: An open file-like-obj of the control file. 362 @param results_dir: A str path where the results should be stored 363 on the local filesystem. 364 @param host: A Host instance on which the control file should 365 be run. 366 @param timeout: Maximum number of seconds to wait for the run or None. 367 @param tag: Tag name for the client side instance of autotest. 368 @param parallel_flag: Flag set when multiple jobs are run at the 369 same time. 370 @param client_disconnect_timeout: Seconds to wait for the remote host 371 to come back after a reboot. Defaults to the host setting for 372 DEFAULT_REBOOT_TIMEOUT. 373 374 @raises AutotestRunError: If there is a problem executing 375 the control file. 376 """ 377 host = self._get_host_and_setup(host, use_packaging=use_packaging) 378 logging.debug('Autotest job starts on remote host: %s', 379 host.hostname) 380 results_dir = os.path.abspath(results_dir) 381 382 if client_disconnect_timeout is None: 383 client_disconnect_timeout = host.DEFAULT_REBOOT_TIMEOUT 384 385 if tag: 386 results_dir = os.path.join(results_dir, tag) 387 388 atrun = _Run(host, results_dir, tag, parallel_flag) 389 self._do_run(control_file, results_dir, host, atrun, timeout, 390 client_disconnect_timeout, use_packaging=use_packaging) 391 392 393 def _get_host_and_setup(self, host, use_packaging=True): 394 if not host: 395 host = self.host 396 if not self.installed: 397 self.install(host, use_packaging=use_packaging) 398 399 host.wait_up(timeout=30) 400 return host 401 402 403 def _do_run(self, control_file, results_dir, host, atrun, timeout, 404 client_disconnect_timeout, use_packaging=True): 405 try: 406 atrun.verify_machine() 407 except: 408 logging.error("Verify failed on %s. Reinstalling autotest", 409 host.hostname) 410 self.install(host) 411 atrun.verify_machine() 412 debug = os.path.join(results_dir, 'debug') 413 try: 414 os.makedirs(debug) 415 except Exception: 416 pass 417 418 delete_file_list = [atrun.remote_control_file, 419 atrun.remote_control_file + '.state', 420 atrun.manual_control_file, 421 atrun.manual_control_file + '.state'] 422 cmd = ';'.join('rm -f ' + control for control in delete_file_list) 423 host.run(cmd, ignore_status=True) 424 425 tmppath = utils.get(control_file, local_copy=True) 426 427 # build up the initialization prologue for the control file 428 prologue_lines = [] 429 430 # Add the additional user arguments 431 prologue_lines.append("args = %r\n" % self.job.args) 432 433 # If the packaging system is being used, add the repository list. 434 repos = None 435 try: 436 if use_packaging: 437 repos = self.get_fetch_location() 438 prologue_lines.append('job.add_repository(%s)\n' % repos) 439 else: 440 logging.debug('use_packaging is set to False, do not add any ' 441 'repository.') 442 except global_config.ConfigError, e: 443 # If repos is defined packaging is enabled so log the error 444 if repos: 445 logging.error(e) 446 447 # on full-size installs, turn on any profilers the server is using 448 running_profilers = host.job.profilers.add_log.iteritems() 449 for profiler, (args, dargs) in running_profilers: 450 call_args = [repr(profiler)] 451 call_args += [repr(arg) for arg in args] 452 call_args += ["%s=%r" % item for item in dargs.iteritems()] 453 prologue_lines.append("job.profilers.add(%s)\n" 454 % ", ".join(call_args)) 455 cfile = "".join(prologue_lines) 456 457 cfile += open(tmppath).read() 458 open(tmppath, "w").write(cfile) 459 460 # Create and copy state file to remote_control_file + '.state' 461 state_file = host.job.preprocess_client_state() 462 host.send_file(state_file, atrun.remote_control_file + '.init.state') 463 os.remove(state_file) 464 465 # Copy control_file to remote_control_file on the host 466 host.send_file(tmppath, atrun.remote_control_file) 467 if os.path.abspath(tmppath) != os.path.abspath(control_file): 468 os.remove(tmppath) 469 470 atrun.execute_control( 471 timeout=timeout, 472 client_disconnect_timeout=client_disconnect_timeout) 473 474 475 @classmethod 476 def _check_client_test_result(cls, host, test_name): 477 """ 478 Check result of client test. 479 Autotest will store results in the file name status. 480 We check that second to last line in that file begins with 'END GOOD' 481 482 @raises TestFail: If client test does not pass. 483 """ 484 client_result_dir = '%s/results/default' % host.autodir 485 command = 'tail -2 %s/status | head -1' % client_result_dir 486 status = host.run(command).stdout.strip() 487 logging.info(status) 488 if status[:8] != 'END GOOD': 489 raise error.TestFail('%s client test did not pass.' % test_name) 490 491 492 def run_timed_test(self, test_name, results_dir='.', host=None, 493 timeout=None, parallel_flag=False, 494 client_disconnect_timeout=None, *args, **dargs): 495 """ 496 Assemble a tiny little control file to just run one test, 497 and run it as an autotest client-side test 498 """ 499 if not host: 500 host = self.host 501 if not self.installed: 502 self.install(host) 503 opts = ["%s=%s" % (o[0], repr(o[1])) for o in dargs.items()] 504 cmd = ", ".join([repr(test_name)] + map(repr, args) + opts) 505 control = "job.run_test(%s)\n" % cmd 506 self.run(control, results_dir, host, timeout=timeout, 507 parallel_flag=parallel_flag, 508 client_disconnect_timeout=client_disconnect_timeout) 509 510 if dargs.get('check_client_result', False): 511 self._check_client_test_result(host, test_name) 512 513 514 def run_test(self, test_name, results_dir='.', host=None, 515 parallel_flag=False, 516 client_disconnect_timeout=None, *args, **dargs): 517 self.run_timed_test(test_name, results_dir, host, timeout=None, 518 parallel_flag=parallel_flag, 519 client_disconnect_timeout=client_disconnect_timeout, 520 *args, **dargs) 521 522 523class _BaseRun(object): 524 """ 525 Represents a run of autotest control file. This class maintains 526 all the state necessary as an autotest control file is executed. 527 528 It is not intended to be used directly, rather control files 529 should be run using the run method in Autotest. 530 """ 531 def __init__(self, host, results_dir, tag, parallel_flag): 532 self.host = host 533 self.results_dir = results_dir 534 self.env = host.env 535 self.tag = tag 536 self.parallel_flag = parallel_flag 537 self.autodir = Autotest.get_installed_autodir(self.host) 538 control = os.path.join(self.autodir, 'control') 539 if tag: 540 control += '.' + tag 541 self.manual_control_file = control 542 self.remote_control_file = control + '.autoserv' 543 self.config_file = os.path.join(self.autodir, 'global_config.ini') 544 545 546 def verify_machine(self): 547 binary = os.path.join(self.autodir, 'bin/autotest') 548 try: 549 self.host.run('ls %s > /dev/null 2>&1' % binary) 550 except: 551 raise error.AutoservInstallError( 552 "Autotest does not appear to be installed") 553 554 if not self.parallel_flag: 555 tmpdir = os.path.join(self.autodir, 'tmp') 556 download = os.path.join(self.autodir, 'tests/download') 557 self.host.run('umount %s' % tmpdir, ignore_status=True) 558 self.host.run('umount %s' % download, ignore_status=True) 559 560 561 def get_base_cmd_args(self): 562 args = ['--verbose'] 563 if self.tag: 564 args.append('-t %s' % self.tag) 565 if self.host.job.use_external_logging(): 566 args.append('-l') 567 if self.host.hostname: 568 args.append('--hostname=%s' % self.host.hostname) 569 args.append('--user=%s' % self.host.job.user) 570 571 args.append(self.remote_control_file) 572 return args 573 574 575 def get_daemon_cmd(self, monitor_dir): 576 cmd = ['nohup', os.path.join(self.autodir, 'bin/autotestd'), 577 monitor_dir, '-H autoserv'] 578 cmd += self.get_base_cmd_args() 579 cmd += ['>/dev/null', '2>/dev/null', '&'] 580 return ' '.join(cmd) 581 582 583 def get_monitor_cmd(self, monitor_dir, stdout_read, stderr_read): 584 cmd = [os.path.join(self.autodir, 'bin', 'autotestd_monitor'), 585 monitor_dir, str(stdout_read), str(stderr_read)] 586 return ' '.join(cmd) 587 588 589 def get_client_log(self): 590 """Find what the "next" client.* prefix should be 591 592 @returns A string of the form client.INTEGER that should be prefixed 593 to all client debug log files. 594 """ 595 max_digit = -1 596 debug_dir = os.path.join(self.results_dir, 'debug') 597 client_logs = glob.glob(os.path.join(debug_dir, 'client.*.*')) 598 for log in client_logs: 599 _, number, _ = log.split('.', 2) 600 if number.isdigit(): 601 max_digit = max(max_digit, int(number)) 602 return 'client.%d' % (max_digit + 1) 603 604 605 def copy_client_config_file(self, client_log_prefix=None): 606 """ 607 Create and copy the client config file based on the server config. 608 609 @param client_log_prefix: Optional prefix to prepend to log files. 610 """ 611 client_config_file = self._create_client_config_file(client_log_prefix) 612 self.host.send_file(client_config_file, self.config_file) 613 os.remove(client_config_file) 614 615 616 def _create_client_config_file(self, client_log_prefix=None): 617 """ 618 Create a temporary file with the [CLIENT] section configuration values 619 taken from the server global_config.ini. 620 621 @param client_log_prefix: Optional prefix to prepend to log files. 622 623 @return: Path of the temporary file generated. 624 """ 625 config = global_config.global_config.get_section_values('CLIENT') 626 if client_log_prefix: 627 config.set('CLIENT', 'default_logging_name', client_log_prefix) 628 return self._create_aux_file(config.write) 629 630 631 def _create_aux_file(self, func, *args): 632 """ 633 Creates a temporary file and writes content to it according to a 634 content creation function. The file object is appended to *args, which 635 is then passed to the content creation function 636 637 @param func: Function that will be used to write content to the 638 temporary file. 639 @param *args: List of parameters that func takes. 640 @return: Path to the temporary file that was created. 641 """ 642 fd, path = tempfile.mkstemp(dir=self.host.job.tmpdir) 643 aux_file = os.fdopen(fd, "w") 644 try: 645 list_args = list(args) 646 list_args.append(aux_file) 647 func(*list_args) 648 finally: 649 aux_file.close() 650 return path 651 652 653 @staticmethod 654 def is_client_job_finished(last_line): 655 return bool(re.match(r'^END .*\t----\t----\t.*$', last_line)) 656 657 658 @staticmethod 659 def is_client_job_rebooting(last_line): 660 return bool(re.match(r'^\t*GOOD\t----\treboot\.start.*$', last_line)) 661 662 663 def _diagnose_dut(self, old_boot_id=None): 664 """ 665 Run diagnostic checks on a DUT. 666 667 1. ping: A dead host will not respond to pings. 668 2. ssh (happens with 3.): DUT hangs usually fail in authentication 669 but respond to pings. 670 3. Check if a reboot occured: A healthy but unexpected reboot leaves the 671 host running with a new boot id. 672 673 This method will always raise an exception from the AutotestFailure 674 family and should only get called when the reason for a test failing 675 is ambiguous. 676 677 @raises AutotestDeviceNotPingable: If the DUT doesn't respond to ping. 678 @raises AutotestDeviceNotSSHable: If we cannot SSH into the DUT. 679 @raises AutotestDeviceRebooted: If the boot id changed. 680 @raises AutotestAbort: If none of the above exceptions were raised. 681 Since we have no recourse we must abort at this stage. 682 """ 683 msg = 'Autotest client terminated unexpectedly: ' 684 if utils.ping(self.host.hostname, tries=1, deadline=1) != 0: 685 msg += 'DUT is no longer pingable, it may have rebooted or hung.\n' 686 raise AutotestDeviceNotPingable(msg) 687 688 if old_boot_id: 689 try: 690 new_boot_id = self.host.get_boot_id(timeout=60) 691 except Exception as e: 692 msg += ('DUT is pingable but not SSHable, it most likely' 693 ' sporadically rebooted during testing. %s\n' % str(e)) 694 raise AutotestDeviceNotSSHable(msg) 695 else: 696 if new_boot_id != old_boot_id: 697 msg += 'DUT rebooted during the test run.\n' 698 raise AutotestDeviceRebooted(msg) 699 700 msg += ('DUT is pingable, SSHable and did NOT restart ' 701 'un-expectedly. We probably lost connectivity during the ' 702 'test.') 703 else: 704 msg += ('DUT is pingable, could not determine if an un-expected ' 705 'reboot occured during the test.') 706 707 raise AutotestAbort(msg) 708 709 710 def log_unexpected_abort(self, stderr_redirector, old_boot_id=None): 711 """ 712 Logs that something unexpected happened, then tries to diagnose the 713 failure. The purpose of this function is only to close out the status 714 log with the appropriate error message, not to critically terminate 715 the program. 716 717 @param stderr_redirector: log stream. 718 @param old_boot_id: boot id used to infer if a reboot occured. 719 """ 720 stderr_redirector.flush_all_buffers() 721 try: 722 self._diagnose_dut(old_boot_id) 723 except AutotestFailure as e: 724 self.host.job.record('END ABORT', None, None, str(e)) 725 726 727 @staticmethod 728 def _strip_stderr_prologue(stderr): 729 """Strips the 'standard' prologue that get pre-pended to every 730 remote command and returns the text that was actually written to 731 stderr by the remote command.""" 732 stderr_lines = stderr.split("\n")[1:] 733 if not stderr_lines: 734 return "" 735 elif stderr_lines[0].startswith("NOTE: autotestd_monitor"): 736 del stderr_lines[0] 737 return "\n".join(stderr_lines) 738 739 740 def _execute_daemon(self, timeout, stderr_redirector, 741 client_disconnect_timeout): 742 monitor_dir = self.host.get_tmp_dir() 743 daemon_cmd = self.get_daemon_cmd(monitor_dir) 744 745 # grab the location for the server-side client log file 746 client_log_prefix = self.get_client_log() 747 client_log_path = os.path.join(self.results_dir, 'debug', 748 client_log_prefix + '.log') 749 client_log = open(client_log_path, 'w', 0) 750 self.copy_client_config_file(client_log_prefix) 751 752 stdout_read = stderr_read = 0 753 self.host.job.push_execution_context(self.results_dir) 754 try: 755 self.host.run(daemon_cmd, ignore_status=True, timeout=timeout) 756 disconnect_warnings = [] 757 while True: 758 monitor_cmd = self.get_monitor_cmd(monitor_dir, stdout_read, 759 stderr_read) 760 try: 761 result = self.host.run(monitor_cmd, ignore_status=True, 762 timeout=timeout, 763 stdout_tee=client_log, 764 stderr_tee=stderr_redirector) 765 except error.AutoservRunError, e: 766 result = e.result_obj 767 result.exit_status = None 768 disconnect_warnings.append(e.description) 769 770 stderr_redirector.log_warning( 771 "Autotest client was disconnected: %s" % e.description, 772 "NETWORK") 773 except error.AutoservSSHTimeout: 774 result = utils.CmdResult(monitor_cmd, "", "", None, 0) 775 stderr_redirector.log_warning( 776 "Attempt to connect to Autotest client timed out", 777 "NETWORK") 778 779 stdout_read += len(result.stdout) 780 stderr_read += len(self._strip_stderr_prologue(result.stderr)) 781 782 if result.exit_status is not None: 783 # TODO (crosbug.com/38224)- sbasi: Remove extra logging. 784 logging.debug('Result exit status is %d.', 785 result.exit_status) 786 return result 787 elif not self.host.wait_up(client_disconnect_timeout): 788 raise error.AutoservSSHTimeout( 789 "client was disconnected, reconnect timed out") 790 finally: 791 client_log.close() 792 self.host.job.pop_execution_context() 793 794 795 def _really_execute_control(self, timeout, stderr_redirector, 796 client_disconnect_timeout): 797 logging.info("Executing %s/bin/autotest %s/controt", 798 self.autodir, self.autodir) 799 800 result = self._execute_daemon(timeout, stderr_redirector, 801 client_disconnect_timeout) 802 803 last_line = stderr_redirector.last_line 804 805 # check if we failed hard enough to warrant an exception 806 if result.exit_status == 1: 807 err = error.AutotestRunError("client job was aborted") 808 elif not result.stderr: 809 err = error.AutotestRunError( 810 "_really_execute_control failed to return anything\n" 811 "stdout:%s\n" % result.stdout) 812 else: 813 err = None 814 815 # log something if the client failed AND never finished logging 816 if err and not self.is_client_job_finished(last_line): 817 self.log_unexpected_abort(stderr_redirector) 818 819 if err: 820 raise err 821 else: 822 return stderr_redirector.last_line 823 824 825 def execute_control(self, timeout=None, client_disconnect_timeout=None): 826 collector = log_collector(self.host, self.tag, self.results_dir) 827 hostname = self.host.hostname 828 remote_results = collector.client_results_dir 829 local_results = collector.server_results_dir 830 self.host.job.add_client_log(hostname, remote_results, 831 local_results) 832 job_record_context = self.host.job.get_record_context() 833 logger = client_logger(self.host, self.tag, self.results_dir) 834 835 try: 836 boot_id = self.host.get_boot_id() 837 last = self._really_execute_control(timeout, logger, 838 client_disconnect_timeout) 839 if self.is_client_job_finished(last): 840 logging.info("Client complete") 841 return 842 elif self.is_client_job_rebooting(last): 843 # TODO(crbug.com/684311) This feature is never used. Validate 844 # and drop this case. 845 m = 'chromeos/autotest/errors/client_test_triggered_reboot' 846 metrics.Counter(m).increment() 847 self.host.job.record("ABORT", None, "reboot", 848 'client triggered reboot is unsupported') 849 self.host.job.record("END ABORT", None, None, 850 'client triggered reboot is unsupported') 851 return 852 853 # If a test fails without probable cause we try to bucket it's 854 # failure into one of 2 categories. If we can determine the 855 # current state of the device and it is suspicious, we close the 856 # status lines indicating a failure. If we either cannot 857 # determine the state of the device, or it appears totally 858 # healthy, we give up and abort. 859 try: 860 self._diagnose_dut(boot_id) 861 except AutotestDeviceError as e: 862 # The status lines of the test are pretty much tailed to 863 # our log, with indentation, from the client job on the DUT. 864 # So if the DUT goes down unexpectedly we'll end up with a 865 # malformed status log unless we manually unwind the status 866 # stack. Ideally we would want to write a nice wrapper like 867 # server_job methods run_reboot, run_group but they expect 868 # reboots and we don't. 869 self.host.job.record('FAIL', None, None, str(e)) 870 self.host.job.record('END FAIL', None, None) 871 self.host.job.record('END GOOD', None, None) 872 self.host.job.failed_with_device_error = True 873 return 874 except AutotestAbort as e: 875 self.host.job.record('ABORT', None, None, str(e)) 876 self.host.job.record('END ABORT', None, None) 877 878 # give the client machine a chance to recover from a crash 879 self.host.wait_up( 880 self.host.HOURS_TO_WAIT_FOR_RECOVERY * 3600) 881 logging.debug('Unexpected final status message from ' 882 'client %s: %s', self.host.hostname, last) 883 # The line 'last' may have sensitive phrases, like 884 # 'END GOOD', which breaks the tko parser. So the error 885 # message will exclude it, since it will be recorded to 886 # status.log. 887 msg = ("Aborting - unexpected final status message from " 888 "client on %s\n") % self.host.hostname 889 raise error.AutotestRunError(msg) 890 finally: 891 logging.debug('Autotest job finishes running. Below is the ' 892 'post-processing operations.') 893 logger.close() 894 collector.collect_client_job_results() 895 collector.remove_redundant_client_logs() 896 state_file = os.path.basename(self.remote_control_file 897 + '.state') 898 state_path = os.path.join(self.results_dir, state_file) 899 self.host.job.postprocess_client_state(state_path) 900 self.host.job.remove_client_log(hostname, remote_results, 901 local_results) 902 job_record_context.restore() 903 904 logging.debug('Autotest job finishes.') 905 906 # should only get here if we timed out 907 assert timeout 908 raise error.AutotestTimeoutError() 909 910 911class log_collector(object): 912 def __init__(self, host, client_tag, results_dir): 913 self.host = host 914 if not client_tag: 915 client_tag = "default" 916 self.client_results_dir = os.path.join(host.get_autodir(), "results", 917 client_tag) 918 self.server_results_dir = results_dir 919 920 921 def collect_client_job_results(self): 922 """ A method that collects all the current results of a running 923 client job into the results dir. By default does nothing as no 924 client job is running, but when running a client job you can override 925 this with something that will actually do something. """ 926 # make an effort to wait for the machine to come up 927 try: 928 self.host.wait_up(timeout=30) 929 except error.AutoservError: 930 # don't worry about any errors, we'll try and 931 # get the results anyway 932 pass 933 934 # Copy all dirs in default to results_dir 935 try: 936 # Build test result directory summary 937 result_tools_runner.run_on_client( 938 self.host, self.client_results_dir, 939 ENABLE_RESULT_THROTTLING) 940 941 with metrics.SecondsTimer( 942 'chromeos/autotest/job/log_collection_duration', 943 fields={'dut_host_name': self.host.hostname}): 944 self.host.get_file( 945 self.client_results_dir + '/', 946 self.server_results_dir, 947 preserve_symlinks=True) 948 except Exception: 949 # well, don't stop running just because we couldn't get logs 950 e_msg = "Unexpected error copying test result logs, continuing ..." 951 logging.error(e_msg) 952 traceback.print_exc(file=sys.stdout) 953 954 955 def remove_redundant_client_logs(self): 956 """Remove client.*.log files in favour of client.*.DEBUG files.""" 957 debug_dir = os.path.join(self.server_results_dir, 'debug') 958 debug_files = [f for f in os.listdir(debug_dir) 959 if re.search(r'^client\.\d+\.DEBUG$', f)] 960 for debug_file in debug_files: 961 log_file = debug_file.replace('DEBUG', 'log') 962 log_file = os.path.join(debug_dir, log_file) 963 if os.path.exists(log_file): 964 os.remove(log_file) 965 966 967# a file-like object for catching stderr from an autotest client and 968# extracting status logs from it 969class BaseClientLogger(object): 970 """Partial file object to write to both stdout and 971 the status log file. We only implement those methods 972 utils.run() actually calls. 973 """ 974 status_parser = re.compile(r"^AUTOTEST_STATUS:([^:]*):(.*)$") 975 test_complete_parser = re.compile(r"^AUTOTEST_TEST_COMPLETE:(.*)$") 976 fetch_package_parser = re.compile( 977 r"^AUTOTEST_FETCH_PACKAGE:([^:]*):([^:]*):(.*)$") 978 extract_indent = re.compile(r"^(\t*).*$") 979 extract_timestamp = re.compile(r".*\ttimestamp=(\d+)\t.*$") 980 981 def __init__(self, host, tag, server_results_dir): 982 self.host = host 983 self.job = host.job 984 self.log_collector = log_collector(host, tag, server_results_dir) 985 self.leftover = "" 986 self.last_line = "" 987 self.logs = {} 988 989 990 def _process_log_dict(self, log_dict): 991 log_list = log_dict.pop("logs", []) 992 for key in sorted(log_dict.iterkeys()): 993 log_list += self._process_log_dict(log_dict.pop(key)) 994 return log_list 995 996 997 def _process_logs(self): 998 """Go through the accumulated logs in self.log and print them 999 out to stdout and the status log. Note that this processes 1000 logs in an ordering where: 1001 1002 1) logs to different tags are never interleaved 1003 2) logs to x.y come before logs to x.y.z for all z 1004 3) logs to x.y come before x.z whenever y < z 1005 1006 Note that this will in general not be the same as the 1007 chronological ordering of the logs. However, if a chronological 1008 ordering is desired that one can be reconstructed from the 1009 status log by looking at timestamp lines.""" 1010 log_list = self._process_log_dict(self.logs) 1011 for entry in log_list: 1012 self.job.record_entry(entry, log_in_subdir=False) 1013 if log_list: 1014 self.last_line = log_list[-1].render() 1015 1016 1017 def _process_quoted_line(self, tag, line): 1018 """Process a line quoted with an AUTOTEST_STATUS flag. If the 1019 tag is blank then we want to push out all the data we've been 1020 building up in self.logs, and then the newest line. If the 1021 tag is not blank, then push the line into the logs for handling 1022 later.""" 1023 entry = base_job.status_log_entry.parse(line) 1024 if entry is None: 1025 return # the line contains no status lines 1026 if tag == "": 1027 self._process_logs() 1028 self.job.record_entry(entry, log_in_subdir=False) 1029 self.last_line = line 1030 else: 1031 tag_parts = [int(x) for x in tag.split(".")] 1032 log_dict = self.logs 1033 for part in tag_parts: 1034 log_dict = log_dict.setdefault(part, {}) 1035 log_list = log_dict.setdefault("logs", []) 1036 log_list.append(entry) 1037 1038 1039 def _process_info_line(self, line): 1040 """Check if line is an INFO line, and if it is, interpret any control 1041 messages (e.g. enabling/disabling warnings) that it may contain.""" 1042 match = re.search(r"^\t*INFO\t----\t----(.*)\t[^\t]*$", line) 1043 if not match: 1044 return # not an INFO line 1045 for field in match.group(1).split('\t'): 1046 if field.startswith("warnings.enable="): 1047 func = self.job.warning_manager.enable_warnings 1048 elif field.startswith("warnings.disable="): 1049 func = self.job.warning_manager.disable_warnings 1050 else: 1051 continue 1052 warning_type = field.split("=", 1)[1] 1053 func(warning_type) 1054 1055 1056 def _process_line(self, line): 1057 """Write out a line of data to the appropriate stream. Status 1058 lines sent by autotest will be prepended with 1059 "AUTOTEST_STATUS", and all other lines are ssh error 1060 messages.""" 1061 status_match = self.status_parser.search(line) 1062 test_complete_match = self.test_complete_parser.search(line) 1063 fetch_package_match = self.fetch_package_parser.search(line) 1064 if status_match: 1065 tag, line = status_match.groups() 1066 self._process_info_line(line) 1067 self._process_quoted_line(tag, line) 1068 elif test_complete_match: 1069 self._process_logs() 1070 fifo_path, = test_complete_match.groups() 1071 try: 1072 self.log_collector.collect_client_job_results() 1073 self.host.run("echo A > %s" % fifo_path) 1074 except Exception: 1075 msg = "Post-test log collection failed, continuing anyway" 1076 logging.exception(msg) 1077 elif fetch_package_match: 1078 pkg_name, dest_path, fifo_path = fetch_package_match.groups() 1079 serve_packages = global_config.global_config.get_config_value( 1080 "PACKAGES", "serve_packages_from_autoserv", type=bool) 1081 if serve_packages and pkg_name.endswith(".tar.bz2"): 1082 try: 1083 self._send_tarball(pkg_name, dest_path) 1084 except Exception: 1085 msg = "Package tarball creation failed, continuing anyway" 1086 logging.exception(msg) 1087 try: 1088 self.host.run("echo B > %s" % fifo_path) 1089 except Exception: 1090 msg = "Package tarball installation failed, continuing anyway" 1091 logging.exception(msg) 1092 else: 1093 logging.info(line) 1094 1095 1096 def _send_tarball(self, pkg_name, remote_dest): 1097 name, pkg_type = self.job.pkgmgr.parse_tarball_name(pkg_name) 1098 src_dirs = [] 1099 if pkg_type == 'test': 1100 for test_dir in ['site_tests', 'tests']: 1101 src_dir = os.path.join(self.job.clientdir, test_dir, name) 1102 if os.path.exists(src_dir): 1103 src_dirs += [src_dir] 1104 if AUTOSERV_PREBUILD: 1105 prebuild.setup(self.job.clientdir, src_dir) 1106 break 1107 elif pkg_type == 'profiler': 1108 src_dirs += [os.path.join(self.job.clientdir, 'profilers', name)] 1109 if AUTOSERV_PREBUILD: 1110 prebuild.setup(self.job.clientdir, src_dir) 1111 elif pkg_type == 'dep': 1112 src_dirs += [os.path.join(self.job.clientdir, 'deps', name)] 1113 elif pkg_type == 'client': 1114 return # you must already have a client to hit this anyway 1115 else: 1116 return # no other types are supported 1117 1118 # iterate over src_dirs until we find one that exists, then tar it 1119 for src_dir in src_dirs: 1120 if os.path.exists(src_dir): 1121 try: 1122 logging.info('Bundling %s into %s', src_dir, pkg_name) 1123 temp_dir = autotemp.tempdir(unique_id='autoserv-packager', 1124 dir=self.job.tmpdir) 1125 tarball_path = self.job.pkgmgr.tar_package( 1126 pkg_name, src_dir, temp_dir.name, " .") 1127 self.host.send_file(tarball_path, remote_dest) 1128 finally: 1129 temp_dir.clean() 1130 return 1131 1132 1133 def log_warning(self, msg, warning_type): 1134 """Injects a WARN message into the current status logging stream.""" 1135 timestamp = int(time.time()) 1136 if self.job.warning_manager.is_valid(timestamp, warning_type): 1137 self.job.record('WARN', None, None, msg) 1138 1139 1140 def write(self, data): 1141 # now start processing the existing buffer and the new data 1142 data = self.leftover + data 1143 lines = data.split('\n') 1144 processed_lines = 0 1145 try: 1146 # process all the buffered data except the last line 1147 # ignore the last line since we may not have all of it yet 1148 for line in lines[:-1]: 1149 self._process_line(line) 1150 processed_lines += 1 1151 finally: 1152 # save any unprocessed lines for future processing 1153 self.leftover = '\n'.join(lines[processed_lines:]) 1154 1155 1156 def flush(self): 1157 sys.stdout.flush() 1158 1159 1160 def flush_all_buffers(self): 1161 if self.leftover: 1162 self._process_line(self.leftover) 1163 self.leftover = "" 1164 self._process_logs() 1165 self.flush() 1166 1167 1168 def close(self): 1169 self.flush_all_buffers() 1170 1171 1172SiteAutotest = client_utils.import_site_class( 1173 __file__, "autotest_lib.server.site_autotest", "SiteAutotest", 1174 BaseAutotest) 1175 1176 1177_SiteRun = client_utils.import_site_class( 1178 __file__, "autotest_lib.server.site_autotest", "_SiteRun", _BaseRun) 1179 1180 1181SiteClientLogger = client_utils.import_site_class( 1182 __file__, "autotest_lib.server.site_autotest", "SiteClientLogger", 1183 BaseClientLogger) 1184 1185 1186class Autotest(SiteAutotest): 1187 pass 1188 1189 1190class client_logger(SiteClientLogger): 1191 pass 1192 1193 1194class _Run(_SiteRun): 1195 pass 1196