1#!/usr/bin/python2 -u 2# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc. 3# Released under the GPL v2 4 5""" 6Run a control file through the server side engine 7""" 8 9import datetime 10import contextlib 11import getpass 12import logging 13import os 14import re 15import shutil 16import signal 17import socket 18import sys 19import traceback 20import time 21import urllib2 22 23import common 24from autotest_lib.client.bin.result_tools import utils as result_utils 25from autotest_lib.client.bin.result_tools import view as result_view 26from autotest_lib.client.common_lib import control_data 27from autotest_lib.client.common_lib import enum 28from autotest_lib.client.common_lib import error 29from autotest_lib.client.common_lib import global_config 30from autotest_lib.client.common_lib import host_queue_entry_states 31from autotest_lib.client.common_lib import host_states 32from autotest_lib.server import results_mocker 33from autotest_lib.server.cros.dynamic_suite import suite 34 35try: 36 from chromite.lib import metrics 37 from chromite.lib import cloud_trace 38except ImportError: 39 from autotest_lib.client.common_lib import utils as common_utils 40 metrics = common_utils.metrics_mock 41 import mock 42 cloud_trace = mock.MagicMock() 43 44_CONFIG = global_config.global_config 45 46# Number of seconds to wait before returning if testing mode is enabled 47TESTING_MODE_SLEEP_SECS = 1 48 49 50from autotest_lib.server import frontend 51from autotest_lib.server import server_logging_config 52from autotest_lib.server import server_job, utils, autoserv_parser, autotest 53from autotest_lib.server import utils as server_utils 54from autotest_lib.server import site_utils 55from autotest_lib.server.cros.dynamic_suite import frontend_wrappers 56from autotest_lib.site_utils import job_directories 57from autotest_lib.site_utils import lxc 58from autotest_lib.site_utils.lxc import utils as lxc_utils 59from autotest_lib.client.common_lib import pidfile, logging_manager 60 61 62# Control segment to stage server-side package. 63STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path( 64 'stage_server_side_package') 65 66# Command line to start servod in a moblab. 67START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s' 68STOP_SERVOD_CMD = 'sudo stop servod' 69 70_AUTOTEST_ROOT = os.path.realpath(os.path.join(os.path.dirname(__file__), '..')) 71_CONTROL_FILE_FROM_CONTROL_NAME = 'control.from_control_name' 72 73_LXC_JOB_FOLDER = 'lxc_job_folder' 74 75def log_alarm(signum, frame): 76 logging.error("Received SIGALARM. Ignoring and continuing on.") 77 sys.exit(1) 78 79 80def _get_machines(parser): 81 """Get a list of machine names from command line arg -m or a file. 82 83 @param parser: Parser for the command line arguments. 84 85 @return: A list of machine names from command line arg -m or the 86 machines file specified in the command line arg -M. 87 """ 88 if parser.options.machines: 89 machines = parser.options.machines.replace(',', ' ').strip().split() 90 else: 91 machines = [] 92 machines_file = parser.options.machines_file 93 if machines_file: 94 machines = [] 95 for m in open(machines_file, 'r').readlines(): 96 # remove comments, spaces 97 m = re.sub('#.*', '', m).strip() 98 if m: 99 machines.append(m) 100 logging.debug('Read list of machines from file: %s', machines_file) 101 logging.debug('Machines: %s', ','.join(machines)) 102 103 if machines: 104 for machine in machines: 105 if not machine or re.search('\s', machine): 106 parser.parser.error("Invalid machine: %s" % str(machine)) 107 machines = list(set(machines)) 108 machines.sort() 109 return machines 110 111 112def _stage_ssp(parser, resultsdir): 113 """Stage server-side package. 114 115 This function calls a control segment to stage server-side package based on 116 the job and autoserv command line option. The detail implementation could 117 be different for each host type. Currently, only CrosHost has 118 stage_server_side_package function defined. 119 The script returns None if no server-side package is available. However, 120 it may raise exception if it failed for reasons other than artifact (the 121 server-side package) not found. 122 123 @param parser: Command line arguments parser passed in the autoserv process. 124 @param resultsdir: Folder to store results. This could be different from 125 parser.options.results: parser.options.results can be set to None 126 for results to be stored in a temp folder. resultsdir can be None 127 for autoserv run requires no logging. 128 129 @return: url to the autotest server-side package. None in case of errors. 130 """ 131 machines_list = _get_machines(parser) 132 machines_list = server_job.get_machine_dicts( 133 machine_names=machines_list, 134 store_dir=os.path.join(resultsdir, parser.options.host_info_subdir), 135 in_lab=parser.options.lab, 136 use_shadow_store=not parser.options.local_only_host_info, 137 host_attributes=parser.options.host_attributes, 138 ) 139 140 namespace = {'machines': machines_list, 141 'isolate_hash': parser.options.isolate, 142 'image': parser.options.test_source_build} 143 script_locals = {} 144 execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals) 145 ssp_url = script_locals['ssp_url'] 146 if not ssp_url: 147 logging.error('Failed to stage SSP package: %s', 148 script_locals['error_msg']) 149 logging.error('This job will fail later, when attempting to run with' 150 ' SSP') 151 return ssp_url 152 153 154def _run_with_ssp(job, container_id, job_id, results, parser, ssp_url, 155 machines): 156 """Run the server job with server-side packaging. 157 158 @param job: The server job object. 159 @param container_id: ID of the container to run the test. 160 @param job_id: ID of the test job. 161 @param results: Folder to store results. This could be different from 162 parser.options.results: 163 parser.options.results can be set to None for results to be 164 stored in a temp folder. 165 results can be None for autoserv run requires no logging. 166 @param parser: Command line parser that contains the options. 167 @param ssp_url: url of the staged server-side package. 168 @param machines: A list of machines to run the test. 169 """ 170 if not ssp_url: 171 job.record('FAIL', None, None, 172 'Failed to stage server-side package') 173 raise error.AutoservError('Failed to stage server-side package') 174 175 bucket = lxc.ContainerBucket() 176 control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != '' 177 else None) 178 try: 179 dut_name = machines[0] if len(machines) >= 1 else None 180 test_container = bucket.setup_test(container_id, job_id, ssp_url, 181 results, control=control, 182 job_folder=_LXC_JOB_FOLDER, 183 dut_name=dut_name, 184 isolate_hash=parser.options.isolate) 185 except Exception as e: 186 job.record('FAIL', None, None, 187 'Failed to setup container for test: %s. Check logs in ' 188 'ssp_logs folder for more details.' % e) 189 raise 190 191 args = sys.argv[:] 192 args.remove('--require-ssp') 193 # --parent_job_id is only useful in autoserv running in host, not in 194 # container. Include this argument will cause test to fail for builds before 195 # CL 286265 was merged. 196 if '--parent_job_id' in args: 197 index = args.index('--parent_job_id') 198 args.remove('--parent_job_id') 199 # Remove the actual parent job id in command line arg. 200 del args[index] 201 202 # A dictionary of paths to replace in the command line. Key is the path to 203 # be replaced with the one in value. 204 paths_to_replace = {} 205 # Replace the control file path with the one in container. 206 if control: 207 container_control_filename = os.path.join( 208 lxc.CONTROL_TEMP_PATH, os.path.basename(control)) 209 paths_to_replace[control] = container_control_filename 210 # Update result directory with the one in container. 211 container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % _LXC_JOB_FOLDER) 212 if parser.options.results: 213 paths_to_replace[parser.options.results] = container_result_dir 214 args = [paths_to_replace.get(arg, arg) for arg in args] 215 216 # Apply --use-existing-results, results directory is aready created and 217 # mounted in container. Apply this arg to avoid exception being raised. 218 if not '--use-existing-results' in args: 219 args.append('--use-existing-results') 220 221 # Make sure autoserv running in container using a different pid file. 222 if not '--pidfile-label' in args: 223 args.extend(['--pidfile-label', 'container_autoserv']) 224 225 cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args]) 226 logging.info('Run command in container: %s', cmd_line) 227 success = False 228 try: 229 test_container.attach_run(cmd_line) 230 success = True 231 except Exception as e: 232 # If the test run inside container fails without generating any log, 233 # write a message to status.log to help troubleshooting. 234 debug_files = os.listdir(os.path.join(results, 'debug')) 235 if not debug_files: 236 job.record('FAIL', None, None, 237 'Failed to run test inside the container: %s. Check ' 238 'logs in ssp_logs folder for more details.' % e) 239 raise 240 finally: 241 metrics.Counter( 242 'chromeos/autotest/experimental/execute_job_in_ssp').increment( 243 fields={'success': success}) 244 test_container.destroy() 245 246 247def correct_results_folder_permission(results): 248 """Make sure the results folder has the right permission settings. 249 250 For tests running with server-side packaging, the results folder has the 251 owner of root. This must be changed to the user running the autoserv 252 process, so parsing job can access the results folder. 253 TODO(dshi): crbug.com/459344 Remove this function when test container can be 254 unprivileged container. 255 256 @param results: Path to the results folder. 257 258 """ 259 if not results: 260 return 261 262 utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results)) 263 utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results)) 264 265 266def _start_servod(machine): 267 """Try to start servod in moblab if it's not already running or running with 268 different board or port. 269 270 @param machine: Name of the dut used for test. 271 """ 272 if not utils.is_moblab(): 273 return 274 275 logging.debug('Trying to start servod.') 276 try: 277 afe = frontend.AFE() 278 board = server_utils.get_board_from_afe(machine, afe) 279 hosts = afe.get_hosts(hostname=machine) 280 servo_host = hosts[0].attributes.get('servo_host', None) 281 servo_port = hosts[0].attributes.get('servo_port', 9999) 282 if not servo_host in ['localhost', '127.0.0.1']: 283 logging.warn('Starting servod is aborted. The dut\'s servo_host ' 284 'attribute is not set to localhost.') 285 return 286 except (urllib2.HTTPError, urllib2.URLError): 287 # Ignore error if RPC failed to get board 288 logging.error('Failed to get board name from AFE. Start servod is ' 289 'aborted') 290 return 291 292 try: 293 pid = utils.run('pgrep servod').stdout 294 cmd_line = utils.run('ps -fp %s' % pid).stdout 295 if ('--board %s' % board in cmd_line and 296 '--port %s' % servo_port in cmd_line): 297 logging.debug('Servod is already running with given board and port.' 298 ' There is no need to restart servod.') 299 return 300 logging.debug('Servod is running with different board or port. ' 301 'Stopping existing servod.') 302 utils.run('sudo stop servod') 303 except error.CmdError: 304 # servod is not running. 305 pass 306 307 try: 308 utils.run(START_SERVOD_CMD % (board, servo_port)) 309 logging.debug('Servod is started') 310 except error.CmdError as e: 311 logging.error('Servod failed to be started, error: %s', e) 312 313 314def _control_path_on_disk(control_name): 315 """Find the control file corresponding to the given control name, on disk. 316 317 @param control_name: NAME attribute of the control file to fetch. 318 @return: Path to the control file. 319 """ 320 cf_getter = suite.create_fs_getter(_AUTOTEST_ROOT) 321 control_name_predicate = suite.test_name_matches_pattern_predicate( 322 '^%s$' % control_name) 323 tests = suite.find_and_parse_tests(cf_getter, control_name_predicate) 324 if not tests: 325 raise error.AutoservError( 326 'Failed to find any control files with NAME %s' % control_name) 327 if len(tests) > 1: 328 logging.error('Found more than one control file with NAME %s: %s', 329 control_name, [t.path for t in tests]) 330 raise error.AutoservError( 331 'Found more than one control file with NAME %s' % control_name) 332 return tests[0].path 333 334 335def _stage_control_file(control_name, results_dir): 336 """Stage the control file to execute from local autotest checkout. 337 338 @param control_name: Name of the control file to stage. 339 @param results_dir: Results directory to stage the control file into. 340 @return: Absolute path to the staged control file. 341 """ 342 control_path = _control_path_on_disk(control_name) 343 new_control = os.path.join(results_dir, _CONTROL_FILE_FROM_CONTROL_NAME) 344 shutil.copy2(control_path, new_control) 345 return new_control 346 347 348def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp): 349 """Run server job with given options. 350 351 @param pid_file_manager: PidFileManager used to monitor the autoserv process 352 @param results: Folder to store results. 353 @param parser: Parser for the command line arguments. 354 @param ssp_url: Url to server-side package. 355 @param use_ssp: Set to True to run with server-side packaging. 356 """ 357 # send stdin to /dev/null 358 dev_null = os.open(os.devnull, os.O_RDONLY) 359 os.dup2(dev_null, sys.stdin.fileno()) 360 os.close(dev_null) 361 362 # Create separate process group if the process is not a process group 363 # leader. This allows autoserv process to keep running after the caller 364 # process (drone manager call) exits. 365 if os.getpid() != os.getpgid(0): 366 os.setsid() 367 368 # Container name is predefined so the container can be destroyed in 369 # handle_sigterm. 370 job_or_task_id = job_directories.get_job_id_or_task_id( 371 parser.options.results) 372 container_id = lxc.ContainerId(job_or_task_id, time.time(), os.getpid()) 373 374 # Implement SIGTERM handler 375 def handle_sigterm(signum, frame): 376 logging.debug('Received SIGTERM') 377 if pid_file_manager: 378 pid_file_manager.close_file(1, signal.SIGTERM) 379 logging.debug('Finished writing to pid_file. Killing process.') 380 381 # Update results folder's file permission. This needs to be done ASAP 382 # before the parsing process tries to access the log. 383 if use_ssp and results: 384 correct_results_folder_permission(results) 385 386 # This sleep allows the pending output to be logged before the kill 387 # signal is sent. 388 time.sleep(.1) 389 if use_ssp: 390 logging.debug('Destroy container %s before aborting the autoserv ' 391 'process.', container_id) 392 try: 393 bucket = lxc.ContainerBucket() 394 container = bucket.get_container(container_id) 395 if container: 396 container.destroy() 397 logging.debug("Container %s destroyed.", container_id) 398 else: 399 logging.debug('Container %s is not found.', container_id) 400 bucket.scrub_container_location(container_id) 401 except: 402 # Handle any exception so the autoserv process can be aborted. 403 logging.exception('Failed to destroy container %s.', 404 container_id) 405 # Try to correct the result file permission again after the 406 # container is destroyed, as the container might have created some 407 # new files in the result folder. 408 if results: 409 correct_results_folder_permission(results) 410 411 os.killpg(os.getpgrp(), signal.SIGKILL) 412 413 # Set signal handler 414 signal.signal(signal.SIGTERM, handle_sigterm) 415 416 # faulthandler is only needed to debug in the Lab and is not avaliable to 417 # be imported in the chroot as part of VMTest, so Try-Except it. 418 try: 419 import faulthandler 420 faulthandler.register(signal.SIGTERM, all_threads=True, chain=True) 421 logging.debug('faulthandler registered on SIGTERM.') 422 except ImportError: 423 sys.exc_clear() 424 425 # Ignore SIGTTOU's generated by output from forked children. 426 signal.signal(signal.SIGTTOU, signal.SIG_IGN) 427 428 # If we received a SIGALARM, let's be loud about it. 429 signal.signal(signal.SIGALRM, log_alarm) 430 431 # Server side tests that call shell scripts often depend on $USER being set 432 # but depending on how you launch your autotest scheduler it may not be set. 433 os.environ['USER'] = getpass.getuser() 434 435 label = parser.options.label 436 group_name = parser.options.group_name 437 user = parser.options.user 438 client = parser.options.client 439 server = parser.options.server 440 verify = parser.options.verify 441 repair = parser.options.repair 442 cleanup = parser.options.cleanup 443 provision = parser.options.provision 444 reset = parser.options.reset 445 job_labels = parser.options.job_labels 446 no_tee = parser.options.no_tee 447 execution_tag = parser.options.execution_tag 448 ssh_user = parser.options.ssh_user 449 ssh_port = parser.options.ssh_port 450 ssh_pass = parser.options.ssh_pass 451 collect_crashinfo = parser.options.collect_crashinfo 452 control_filename = parser.options.control_filename 453 verify_job_repo_url = parser.options.verify_job_repo_url 454 skip_crash_collection = parser.options.skip_crash_collection 455 ssh_verbosity = int(parser.options.ssh_verbosity) 456 ssh_options = parser.options.ssh_options 457 no_use_packaging = parser.options.no_use_packaging 458 in_lab = bool(parser.options.lab) 459 460 # can't be both a client and a server side test 461 if client and server: 462 parser.parser.error("Can not specify a test as both server and client!") 463 464 if provision and client: 465 parser.parser.error("Cannot specify provisioning and client!") 466 467 is_special_task = (verify or repair or cleanup or collect_crashinfo or 468 provision or reset) 469 use_client_trampoline = False 470 if parser.options.control_name: 471 if use_ssp: 472 # When use_ssp is True, autoserv will be re-executed inside a 473 # container preserving the --control-name argument. Control file 474 # will be staged inside the rexecuted autoserv. 475 control = None 476 else: 477 try: 478 control = _stage_control_file(parser.options.control_name, 479 results) 480 except error.AutoservError as e: 481 logging.info("Using client trampoline because of: %s", e) 482 control = parser.options.control_name 483 use_client_trampoline = True 484 485 elif parser.args: 486 control = parser.args[0] 487 else: 488 if not is_special_task: 489 parser.parser.error("Missing argument: control file") 490 control = None 491 492 if ssh_verbosity > 0: 493 # ssh_verbosity is an integer between 0 and 3, inclusive 494 ssh_verbosity_flag = '-' + 'v' * ssh_verbosity 495 else: 496 ssh_verbosity_flag = '' 497 498 machines = _get_machines(parser) 499 if group_name and len(machines) < 2: 500 parser.parser.error('-G %r may only be supplied with more than one ' 501 'machine.' % group_name) 502 503 logging.debug("Parser.args is %r", parser.args) 504 try: 505 logging.debug("Parser.options.args is %r", parser.options.args) 506 except AttributeError: 507 logging.debug("No Parser.options.args.") 508 509 job_kwargs = { 510 'control': control, 511 'args': parser.args[1:], 512 'resultdir': results, 513 'label': label, 514 'user': user, 515 'machines': machines, 516 'machine_dict_list': server_job.get_machine_dicts( 517 machine_names=machines, 518 store_dir=os.path.join(results, 519 parser.options.host_info_subdir), 520 in_lab=in_lab, 521 use_shadow_store=not parser.options.local_only_host_info, 522 host_attributes=parser.options.host_attributes, 523 ), 524 'client': client, 525 'ssh_user': ssh_user, 526 'ssh_port': ssh_port, 527 'ssh_pass': ssh_pass, 528 'ssh_verbosity_flag': ssh_verbosity_flag, 529 'ssh_options': ssh_options, 530 'group_name': group_name, 531 'tag': execution_tag, 532 'disable_sysinfo': parser.options.disable_sysinfo, 533 'in_lab': in_lab, 534 'use_client_trampoline': use_client_trampoline, 535 } 536 if parser.options.parent_job_id: 537 job_kwargs['parent_job_id'] = int(parser.options.parent_job_id) 538 if control_filename: 539 job_kwargs['control_filename'] = control_filename 540 job = server_job.server_job(**job_kwargs) 541 542 job.logging.start_logging() 543 544 # perform checks 545 job.precheck() 546 547 # run the job 548 exit_code = 0 549 auto_start_servod = _CONFIG.get_config_value( 550 'AUTOSERV', 'auto_start_servod', type=bool, default=False) 551 552 site_utils.SetupTsMonGlobalState('autoserv', indirect=False, 553 short_lived=True) 554 try: 555 try: 556 if repair: 557 if auto_start_servod and len(machines) == 1: 558 _start_servod(machines[0]) 559 job.repair(job_labels) 560 elif verify: 561 job.verify(job_labels) 562 elif provision: 563 job.provision(job_labels) 564 elif reset: 565 job.reset(job_labels) 566 elif cleanup: 567 job.cleanup(job_labels) 568 else: 569 if auto_start_servod and len(machines) == 1: 570 _start_servod(machines[0]) 571 if use_ssp: 572 try: 573 _run_with_ssp(job, container_id, job_or_task_id, 574 results, parser, ssp_url, machines) 575 finally: 576 # Update the ownership of files in result folder. 577 correct_results_folder_permission(results) 578 else: 579 if collect_crashinfo: 580 # Update the ownership of files in result folder. If the 581 # job to collect crashinfo was running inside container 582 # (SSP) and crashed before correcting folder permission, 583 # the result folder might have wrong permission setting. 584 try: 585 correct_results_folder_permission(results) 586 except: 587 # Ignore any error as the user may not have root 588 # permission to run sudo command. 589 pass 590 metric_name = ('chromeos/autotest/experimental/' 591 'autoserv_job_run_duration') 592 f = {'in_container': utils.is_in_container(), 593 'success': False} 594 with metrics.SecondsTimer(metric_name, fields=f) as c: 595 job.run(verify_job_repo_url=verify_job_repo_url, 596 only_collect_crashinfo=collect_crashinfo, 597 skip_crash_collection=skip_crash_collection, 598 job_labels=job_labels, 599 use_packaging=(not no_use_packaging)) 600 c['success'] = True 601 602 finally: 603 job.close() 604 # Special task doesn't run parse, so result summary needs to be 605 # built here. 606 if results and (repair or verify or reset or cleanup or provision): 607 # Throttle the result on the server side. 608 try: 609 result_utils.execute( 610 results, control_data.DEFAULT_MAX_RESULT_SIZE_KB) 611 except: 612 logging.exception( 613 'Non-critical failure: Failed to throttle results ' 614 'in directory %s.', results) 615 # Build result view and report metrics for result sizes. 616 site_utils.collect_result_sizes(results) 617 except: 618 exit_code = 1 619 traceback.print_exc() 620 finally: 621 metrics.Flush() 622 623 sys.exit(exit_code) 624 625 626# Job breakdown statuses 627_hs = host_states.Status 628_qs = host_queue_entry_states.Status 629_status_list = [ 630 _qs.QUEUED, _qs.RESETTING, _qs.VERIFYING, 631 _qs.PROVISIONING, _hs.REPAIRING, _qs.CLEANING, 632 _qs.RUNNING, _qs.GATHERING, _qs.PARSING] 633_JOB_OVERHEAD_STATUS = enum.Enum(*_status_list, string_values=True) 634 635 636def get_job_status(options): 637 """Returns the HQE Status for this run. 638 639 @param options: parser options. 640 """ 641 s = _JOB_OVERHEAD_STATUS 642 task_mapping = { 643 'reset': s.RESETTING, 'verify': s.VERIFYING, 644 'provision': s.PROVISIONING, 'repair': s.REPAIRING, 645 'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING} 646 match = [task for task in task_mapping if getattr(options, task, False)] 647 return task_mapping[match[0]] if match else s.RUNNING 648 649 650def _require_ssp_from_control(control_name): 651 """Read the value of REQUIRE_SSP from test control file. 652 653 This reads the control file from the prod checkout of autotest and uses that 654 to determine whether to even stage the SSP package on a devserver. 655 656 This means: 657 [1] Any change in REQUIRE_SSP directive in a test requires a prod-push to go 658 live. 659 [2] This function may find that the control file does not exist but the SSP 660 package may contain the test file. This function conservatively returns True 661 in that case. 662 663 This function is called very early in autoserv, before logging is setup. 664 """ 665 if not control_name: 666 return True 667 try: 668 path = _control_path_on_disk(control_name) 669 except error.AutoservError as e: 670 sys.stderr.write("autoserv: Could not determine control file path," 671 " assuming we need SSP: %s\n" % e) 672 sys.stderr.flush() 673 return True 674 if not os.path.isfile(path): 675 return True 676 control = control_data.parse_control(path) 677 # There must be explicit directive in the control file to disable SSP. 678 if not control or control.require_ssp is None: 679 return True 680 return control.require_ssp 681 682 683def main(): 684 start_time = datetime.datetime.now() 685 parser = autoserv_parser.autoserv_parser 686 parser.parse_args() 687 688 if len(sys.argv) == 1: 689 parser.parser.print_help() 690 sys.exit(1) 691 692 if parser.options.no_logging: 693 results = None 694 else: 695 results = parser.options.results 696 if not results: 697 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S') 698 results = os.path.abspath(results) 699 resultdir_exists = False 700 for filename in ('control.srv', 'status.log', '.autoserv_execute'): 701 if os.path.exists(os.path.join(results, filename)): 702 resultdir_exists = True 703 if not parser.options.use_existing_results and resultdir_exists: 704 error = "Error: results directory already exists: %s\n" % results 705 sys.stderr.write(error) 706 sys.exit(1) 707 708 # Now that we certified that there's no leftover results dir from 709 # previous jobs, lets create the result dir since the logging system 710 # needs to create the log file in there. 711 if not os.path.isdir(results): 712 os.makedirs(results) 713 714 if parser.options.require_ssp: 715 # This is currently only used for skylab (i.e., when --control-name is 716 # used). 717 use_ssp = _require_ssp_from_control(parser.options.control_name) 718 else: 719 use_ssp = False 720 721 722 if use_ssp: 723 log_dir = os.path.join(results, 'ssp_logs') if results else None 724 if log_dir and not os.path.exists(log_dir): 725 os.makedirs(log_dir) 726 else: 727 log_dir = results 728 729 logging_manager.configure_logging( 730 server_logging_config.ServerLoggingConfig(), 731 results_dir=log_dir, 732 use_console=not parser.options.no_tee, 733 verbose=parser.options.verbose, 734 no_console_prefix=parser.options.no_console_prefix) 735 736 logging.debug('autoserv is running in drone %s.', socket.gethostname()) 737 logging.debug('autoserv environment: %r', os.environ) 738 logging.debug('autoserv command was: %s', ' '.join(sys.argv)) 739 logging.debug('autoserv parsed options: %s', parser.options) 740 741 if use_ssp: 742 ssp_url = _stage_ssp(parser, results) 743 else: 744 ssp_url = None 745 746 if results: 747 logging.info("Results placed in %s" % results) 748 749 # wait until now to perform this check, so it get properly logged 750 if (parser.options.use_existing_results and not resultdir_exists and 751 not utils.is_in_container()): 752 logging.error("No existing results directory found: %s", results) 753 sys.exit(1) 754 755 if parser.options.write_pidfile and results: 756 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label, 757 results) 758 pid_file_manager.open_file() 759 else: 760 pid_file_manager = None 761 762 autotest.Autotest.set_install_in_tmpdir( 763 parser.options.install_in_tmpdir) 764 765 exit_code = 0 766 # TODO(beeps): Extend this to cover different failure modes. 767 # Testing exceptions are matched against labels sent to autoserv. Eg, 768 # to allow only the hostless job to run, specify 769 # testing_exceptions: test_suite in the shadow_config. To allow both 770 # the hostless job and dummy_Pass to run, specify 771 # testing_exceptions: test_suite,dummy_Pass. You can figure out 772 # what label autoserv is invoked with by looking through the logs of a test 773 # for the autoserv command's -l option. 774 testing_exceptions = _CONFIG.get_config_value( 775 'AUTOSERV', 'testing_exceptions', type=list, default=[]) 776 test_mode = _CONFIG.get_config_value( 777 'AUTOSERV', 'testing_mode', type=bool, default=False) 778 test_mode = (results_mocker and test_mode and not 779 any([ex in parser.options.label 780 for ex in testing_exceptions])) 781 is_task = (parser.options.verify or parser.options.repair or 782 parser.options.provision or parser.options.reset or 783 parser.options.cleanup or parser.options.collect_crashinfo) 784 785 trace_labels = { 786 'job_id': job_directories.get_job_id_or_task_id( 787 parser.options.results) 788 } 789 trace = cloud_trace.SpanStack( 790 labels=trace_labels, 791 global_context=parser.options.cloud_trace_context) 792 trace.enabled = parser.options.cloud_trace_context_enabled == 'True' 793 try: 794 try: 795 if test_mode: 796 # The parser doesn't run on tasks anyway, so we can just return 797 # happy signals without faking results. 798 if not is_task: 799 machine = parser.options.results.split('/')[-1] 800 801 # TODO(beeps): The proper way to do this would be to 802 # refactor job creation so we can invoke job.record 803 # directly. To do that one needs to pipe the test_name 804 # through run_autoserv and bail just before invoking 805 # the server job. See the comment in 806 # puppylab/results_mocker for more context. 807 results_mocker.ResultsMocker( 808 'unknown-test', parser.options.results, machine 809 ).mock_results() 810 return 811 else: 812 with trace.Span(get_job_status(parser.options)): 813 run_autoserv(pid_file_manager, results, parser, ssp_url, 814 use_ssp) 815 except SystemExit as e: 816 exit_code = e.code 817 if exit_code: 818 logging.exception('Uncaught SystemExit with code %s', exit_code) 819 except Exception: 820 # If we don't know what happened, we'll classify it as 821 # an 'abort' and return 1. 822 logging.exception('Uncaught Exception, exit_code = 1.') 823 exit_code = 1 824 finally: 825 if pid_file_manager: 826 pid_file_manager.close_file(exit_code) 827 sys.exit(exit_code) 828 829 830if __name__ == '__main__': 831 main() 832