1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import abc 6import datetime 7import difflib 8import functools 9import hashlib 10import logging 11import operator 12import os 13import re 14import sys 15import warnings 16 17import common 18 19from autotest_lib.frontend.afe.json_rpc import proxy 20from autotest_lib.client.common_lib import control_data 21from autotest_lib.client.common_lib import enum 22from autotest_lib.client.common_lib import error 23from autotest_lib.client.common_lib import global_config 24from autotest_lib.client.common_lib import priorities 25from autotest_lib.client.common_lib import time_utils 26from autotest_lib.client.common_lib import utils 27from autotest_lib.frontend.afe import model_attributes 28from autotest_lib.frontend.afe.json_rpc import proxy 29from autotest_lib.server.cros import provision 30from autotest_lib.server.cros.dynamic_suite import constants 31from autotest_lib.server.cros.dynamic_suite import control_file_getter 32from autotest_lib.server.cros.dynamic_suite import frontend_wrappers 33from autotest_lib.server.cros.dynamic_suite import job_status 34from autotest_lib.server.cros.dynamic_suite import tools 35from autotest_lib.server.cros.dynamic_suite.job_status import Status 36 37try: 38 from chromite.lib import boolparse_lib 39 from chromite.lib import cros_logging as logging 40except ImportError: 41 print 'Unable to import chromite.' 42 print 'This script must be either:' 43 print ' - Be run in the chroot.' 44 print ' - (not yet supported) be run after running ' 45 print ' ../utils/build_externals.py' 46 47_FILE_BUG_SUITES = ['au', 'bvt', 'bvt-cq', 'bvt-inline', 'paygen_au_beta', 48 'paygen_au_canary', 'paygen_au_dev', 'paygen_au_stable', 49 'sanity', 'push_to_prod'] 50_AUTOTEST_DIR = global_config.global_config.get_config_value( 51 'SCHEDULER', 'drone_installation_directory') 52ENABLE_CONTROLS_IN_BATCH = global_config.global_config.get_config_value( 53 'CROS', 'enable_getting_controls_in_batch', type=bool, default=False) 54 55class RetryHandler(object): 56 """Maintain retry information. 57 58 @var _retry_map: A dictionary that stores retry history. 59 The key is afe job id. The value is a dictionary. 60 {job_id: {'state':RetryHandler.States, 'retry_max':int}} 61 - state: 62 The retry state of a job. 63 NOT_ATTEMPTED: 64 We haven't done anything about the job. 65 ATTEMPTED: 66 We've made an attempt to schedule a retry job. The 67 scheduling may or may not be successful, e.g. 68 it might encounter an rpc error. Note failure 69 in scheduling a retry is different from a retry job failure. 70 For each job, we only attempt to schedule a retry once. 71 For example, assume we have a test with JOB_RETRIES=5 and 72 its second retry job failed. When we attempt to create 73 a third retry job to retry the second, we hit an rpc 74 error. In such case, we will give up on all following 75 retries. 76 RETRIED: 77 A retry job has already been successfully 78 scheduled. 79 - retry_max: 80 The maximum of times the job can still 81 be retried, taking into account retries 82 that have occurred. 83 @var _retry_level: A retry might be triggered only if the result 84 is worse than the level. 85 @var _max_retries: Maximum retry limit at suite level. 86 Regardless how many times each individual test 87 has been retried, the total number of retries happening in 88 the suite can't exceed _max_retries. 89 """ 90 91 States = enum.Enum('NOT_ATTEMPTED', 'ATTEMPTED', 'RETRIED', 92 start_value=1, step=1) 93 94 def __init__(self, initial_jobs_to_tests, retry_level='WARN', 95 max_retries=None): 96 """Initialize RetryHandler. 97 98 @param initial_jobs_to_tests: A dictionary that maps a job id to 99 a ControlData object. This dictionary should contain 100 jobs that are originally scheduled by the suite. 101 @param retry_level: A retry might be triggered only if the result is 102 worse than the level. 103 @param max_retries: Integer, maxmium total retries allowed 104 for the suite. Default to None, no max. 105 """ 106 self._retry_map = {} 107 self._retry_level = retry_level 108 self._max_retries = (max_retries 109 if max_retries is not None else sys.maxint) 110 for job_id, test in initial_jobs_to_tests.items(): 111 if test.job_retries > 0: 112 self._add_job(new_job_id=job_id, 113 retry_max=test.job_retries) 114 115 116 def _add_job(self, new_job_id, retry_max): 117 """Add a newly-created job to the retry map. 118 119 @param new_job_id: The afe_job_id of a newly created job. 120 @param retry_max: The maximum of times that we could retry 121 the test if the job fails. 122 123 @raises ValueError if new_job_id is already in retry map. 124 125 """ 126 if new_job_id in self._retry_map: 127 raise ValueError('add_job called when job is already in retry map.') 128 129 self._retry_map[new_job_id] = { 130 'state': self.States.NOT_ATTEMPTED, 131 'retry_max': retry_max} 132 133 134 def _suite_max_reached(self): 135 """Return whether maximum retry limit for a suite has been reached.""" 136 return self._max_retries <= 0 137 138 139 def add_retry(self, old_job_id, new_job_id): 140 """Record a retry. 141 142 Update retry map with the retry information. 143 144 @param old_job_id: The afe_job_id of the job that is retried. 145 @param new_job_id: The afe_job_id of the retry job. 146 147 @raises KeyError if old_job_id isn't in the retry map. 148 @raises ValueError if we have already retried or made an attempt 149 to retry the old job. 150 151 """ 152 old_record = self._retry_map[old_job_id] 153 if old_record['state'] != self.States.NOT_ATTEMPTED: 154 raise ValueError( 155 'We have already retried or attempted to retry job %d' % 156 old_job_id) 157 old_record['state'] = self.States.RETRIED 158 self._add_job(new_job_id=new_job_id, 159 retry_max=old_record['retry_max'] - 1) 160 self._max_retries -= 1 161 162 163 def set_attempted(self, job_id): 164 """Set the state of the job to ATTEMPTED. 165 166 @param job_id: afe_job_id of a job. 167 168 @raises KeyError if job_id isn't in the retry map. 169 @raises ValueError if the current state is not NOT_ATTEMPTED. 170 171 """ 172 current_state = self._retry_map[job_id]['state'] 173 if current_state != self.States.NOT_ATTEMPTED: 174 # We are supposed to retry or attempt to retry each job 175 # only once. Raise an error if this is not the case. 176 raise ValueError('Unexpected state transition: %s -> %s' % 177 (self.States.get_string(current_state), 178 self.States.get_string(self.States.ATTEMPTED))) 179 else: 180 self._retry_map[job_id]['state'] = self.States.ATTEMPTED 181 182 183 def has_following_retry(self, result): 184 """Check whether there will be a following retry. 185 186 We have the following cases for a given job id (result.id), 187 - no retry map entry -> retry not required, no following retry 188 - has retry map entry: 189 - already retried -> has following retry 190 - has not retried 191 (this branch can be handled by checking should_retry(result)) 192 - retry_max == 0 --> the last retry job, no more retry 193 - retry_max > 0 194 - attempted, but has failed in scheduling a 195 following retry due to rpc error --> no more retry 196 - has not attempped --> has following retry if test failed. 197 198 @param result: A result, encapsulating the status of the job. 199 200 @returns: True, if there will be a following retry. 201 False otherwise. 202 203 """ 204 return (result.test_executed 205 and result.id in self._retry_map 206 and (self._retry_map[result.id]['state'] == self.States.RETRIED 207 or self._should_retry(result))) 208 209 210 def _should_retry(self, result): 211 """Check whether we should retry a job based on its result. 212 213 We will retry the job that corresponds to the result 214 when all of the following are true. 215 a) The test was actually executed, meaning that if 216 a job was aborted before it could ever reach the state 217 of 'Running', the job will not be retried. 218 b) The result is worse than |self._retry_level| which 219 defaults to 'WARN'. 220 c) The test requires retry, i.e. the job has an entry in the retry map. 221 d) We haven't made any retry attempt yet, i.e. state == NOT_ATTEMPTED 222 Note that if a test has JOB_RETRIES=5, and the second time 223 it was retried it hit an rpc error, we will give up on 224 all following retries. 225 e) The job has not reached its retry max, i.e. retry_max > 0 226 227 @param result: A result, encapsulating the status of the job. 228 229 @returns: True if we should retry the job. 230 231 """ 232 return ( 233 result.test_executed 234 and result.id in self._retry_map 235 and not self._suite_max_reached() 236 and result.is_worse_than( 237 job_status.Status(self._retry_level, '', 'reason')) 238 and self._retry_map[result.id]['state'] == self.States.NOT_ATTEMPTED 239 and self._retry_map[result.id]['retry_max'] > 0 240 ) 241 242 243 def get_retry_max(self, job_id): 244 """Get the maximum times the job can still be retried. 245 246 @param job_id: afe_job_id of a job. 247 248 @returns: An int, representing the maximum times the job can still be 249 retried. 250 @raises KeyError if job_id isn't in the retry map. 251 252 """ 253 return self._retry_map[job_id]['retry_max'] 254 255 256class _SuiteChildJobCreator(object): 257 """Create test jobs for a suite.""" 258 259 def __init__( 260 self, 261 tag, 262 builds, 263 board, 264 afe=None, 265 max_runtime_mins=24*60, 266 timeout_mins=24*60, 267 suite_job_id=None, 268 ignore_deps=False, 269 extra_deps=(), 270 priority=priorities.Priority.DEFAULT, 271 offload_failures_only=False, 272 test_source_build=None, 273 job_keyvals=None, 274 ): 275 """ 276 Constructor 277 278 @param tag: a string with which to tag jobs run in this suite. 279 @param builds: the builds on which we're running this suite. 280 @param board: the board on which we're running this suite. 281 @param afe: an instance of AFE as defined in server/frontend.py. 282 @param max_runtime_mins: Maximum suite runtime, in minutes. 283 @param timeout_mins: Maximum job lifetime, in minutes. 284 @param suite_job_id: Job id that will act as parent id to all sub jobs. 285 Default: None 286 @param ignore_deps: True if jobs should ignore the DEPENDENCIES 287 attribute and skip applying of dependency labels. 288 (Default:False) 289 @param extra_deps: A list of strings which are the extra DEPENDENCIES 290 to add to each test being scheduled. 291 @param priority: Integer priority level. Higher is more important. 292 @param offload_failures_only: Only enable gs_offloading for failed 293 jobs. 294 @param test_source_build: Build that contains the server-side test code. 295 @param job_keyvals: General job keyvals to be inserted into keyval file, 296 which will be used by tko/parse later. 297 """ 298 self._tag = tag 299 self._builds = builds 300 self._board = board 301 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30, 302 delay_sec=10, 303 debug=False) 304 self._max_runtime_mins = max_runtime_mins 305 self._timeout_mins = timeout_mins 306 self._suite_job_id = suite_job_id 307 self._ignore_deps = ignore_deps 308 self._extra_deps = tuple(extra_deps) 309 self._priority = priority 310 self._offload_failures_only = offload_failures_only 311 self._test_source_build = test_source_build 312 self._job_keyvals = job_keyvals 313 314 315 @property 316 def cros_build(self): 317 """Return the CrOS build or the first build in the builds dict.""" 318 # TODO(ayatane): Note that the builds dict isn't ordered. I'm not 319 # sure what the implications of this are, but it's probably not a 320 # good thing. 321 return self._builds.get(provision.CROS_VERSION_PREFIX, 322 self._builds.values()[0]) 323 324 325 def create_job(self, test, retry_for=None): 326 """ 327 Thin wrapper around frontend.AFE.create_job(). 328 329 @param test: ControlData object for a test to run. 330 @param retry_for: If the to-be-created job is a retry for an 331 old job, the afe_job_id of the old job will 332 be passed in as |retry_for|, which will be 333 recorded in the new job's keyvals. 334 @returns: A frontend.Job object with an added test_name member. 335 test_name is used to preserve the higher level TEST_NAME 336 name of the job. 337 """ 338 # For a system running multiple suites which share tests, the priority 339 # overridden may lead to unexpected scheduling order that adds extra 340 # provision jobs. 341 test_priority = self._priority 342 if utils.is_moblab(): 343 test_priority = max(self._priority, test.priority) 344 345 reboot_before = (model_attributes.RebootBefore.NEVER if test.fast 346 else None) 347 348 test_obj = self._afe.create_job( 349 control_file=test.text, 350 name=tools.create_job_name( 351 self._test_source_build or self.cros_build, 352 self._tag, 353 test.name), 354 control_type=test.test_type.capitalize(), 355 meta_hosts=[self._board]*test.sync_count, 356 dependencies=self._create_job_deps(test), 357 keyvals=self._create_keyvals_for_test_job(test, retry_for), 358 max_runtime_mins=self._max_runtime_mins, 359 timeout_mins=self._timeout_mins, 360 parent_job_id=self._suite_job_id, 361 test_retry=test.retries, 362 reboot_before=reboot_before, 363 run_reset=not test.fast, 364 priority=test_priority, 365 synch_count=test.sync_count, 366 require_ssp=test.require_ssp) 367 368 test_obj.test_name = test.name 369 return test_obj 370 371 372 def _create_job_deps(self, test): 373 """Create job deps list for a test job. 374 375 @returns: A list of dependency strings. 376 """ 377 if self._ignore_deps: 378 job_deps = [] 379 else: 380 job_deps = list(test.dependencies) 381 job_deps.extend(self._extra_deps) 382 return job_deps 383 384 385 def _create_keyvals_for_test_job(self, test, retry_for=None): 386 """Create keyvals dict for creating a test job. 387 388 @param test: ControlData object for a test to run. 389 @param retry_for: If the to-be-created job is a retry for an 390 old job, the afe_job_id of the old job will 391 be passed in as |retry_for|, which will be 392 recorded in the new job's keyvals. 393 @returns: A keyvals dict for creating the test job. 394 """ 395 keyvals = { 396 constants.JOB_BUILD_KEY: self.cros_build, 397 constants.JOB_SUITE_KEY: self._tag, 398 constants.JOB_EXPERIMENTAL_KEY: test.experimental, 399 constants.JOB_BUILDS_KEY: self._builds 400 } 401 # test_source_build is saved to job_keyvals so scheduler can retrieve 402 # the build name from database when compiling autoserv commandline. 403 # This avoid a database change to add a new field in afe_jobs. 404 # 405 # Only add `test_source_build` to job keyvals if the build is different 406 # from the CrOS build or the job uses more than one build, e.g., both 407 # firmware and CrOS will be updated in the dut. 408 # This is for backwards compatibility, so the update Autotest code can 409 # compile an autoserv command line to run in a SSP container using 410 # previous builds. 411 if (self._test_source_build and 412 (self.cros_build != self._test_source_build or 413 len(self._builds) > 1)): 414 keyvals[constants.JOB_TEST_SOURCE_BUILD_KEY] = \ 415 self._test_source_build 416 for prefix, build in self._builds.iteritems(): 417 if prefix == provision.FW_RW_VERSION_PREFIX: 418 keyvals[constants.FWRW_BUILD]= build 419 elif prefix == provision.FW_RO_VERSION_PREFIX: 420 keyvals[constants.FWRO_BUILD] = build 421 # Add suite job id to keyvals so tko parser can read it from keyval 422 # file. 423 if self._suite_job_id: 424 keyvals[constants.PARENT_JOB_ID] = self._suite_job_id 425 # We drop the old job's id in the new job's keyval file so that 426 # later our tko parser can figure out the retry relationship and 427 # invalidate the results of the old job in tko database. 428 if retry_for: 429 keyvals[constants.RETRY_ORIGINAL_JOB_ID] = retry_for 430 if self._offload_failures_only: 431 keyvals[constants.JOB_OFFLOAD_FAILURES_KEY] = True 432 if self._job_keyvals: 433 for key in constants.INHERITED_KEYVALS: 434 if key in self._job_keyvals: 435 keyvals[key] = self._job_keyvals[key] 436 return keyvals 437 438 439def _get_cf_retriever(cf_getter, forgiving_parser=True, run_prod_code=False, 440 test_args=None): 441 """Return the correct _ControlFileRetriever instance. 442 443 If cf_getter is a File system ControlFileGetter, return a 444 _ControlFileRetriever. This performs a full parse of the root 445 directory associated with the getter. This is the case when it's 446 invoked from suite_preprocessor. 447 448 If cf_getter is a devserver getter, return a 449 _BatchControlFileRetriever. This looks up the suite_name in a suite 450 to control file map generated at build time, and parses the relevant 451 control files alone. This lookup happens on the devserver, so as far 452 as this method is concerned, both cases are equivalent. If 453 enable_controls_in_batch is switched on, this function will call 454 cf_getter.get_suite_info() to get a dict of control files and 455 contents in batch. 456 """ 457 if _should_batch_with(cf_getter): 458 cls = _BatchControlFileRetriever 459 else: 460 cls = _ControlFileRetriever 461 return cls(cf_getter, forgiving_parser, run_prod_code, test_args) 462 463 464def _should_batch_with(cf_getter): 465 """Return whether control files should be fetched in batch. 466 467 This depends on the control file getter and configuration options. 468 469 @param cf_getter: a control_file_getter.ControlFileGetter used to list 470 and fetch the content of control files 471 """ 472 return (ENABLE_CONTROLS_IN_BATCH 473 and isinstance(cf_getter, control_file_getter.DevServerGetter)) 474 475 476class _ControlFileRetriever(object): 477 """Retrieves control files. 478 479 This returns control data instances, unlike control file getters 480 which simply return the control file text contents. 481 """ 482 483 def __init__(self, cf_getter, forgiving_parser=True, run_prod_code=False, 484 test_args=None): 485 """Initialize instance. 486 487 @param cf_getter: a control_file_getter.ControlFileGetter used to list 488 and fetch the content of control files 489 @param forgiving_parser: If False, will raise ControlVariableExceptions 490 if any are encountered when parsing control 491 files. Note that this can raise an exception 492 for syntax errors in unrelated files, because 493 we parse them before applying the predicate. 494 @param run_prod_code: If true, the retrieved tests will run the test 495 code that lives in prod aka the test code 496 currently on the lab servers by disabling 497 SSP for the discovered tests. 498 @param test_args: A dict of args to be seeded in test control file under 499 the name |args_dict|. 500 """ 501 self._cf_getter = cf_getter 502 self._forgiving_parser = forgiving_parser 503 self._run_prod_code = run_prod_code 504 self._test_args = test_args 505 506 507 def retrieve(self, test_name): 508 """Retrieve a test's control data. 509 510 This ignores forgiving_parser because we cannot return a 511 forgiving value. 512 513 @param test_name: Name of test to retrieve. 514 515 @raises ControlVariableException: There is a syntax error in a 516 control file. 517 518 @returns a ControlData object 519 """ 520 path = self._cf_getter.get_control_file_path(test_name) 521 text = self._cf_getter.get_control_file_contents(path) 522 return self._parse_cf_text(path, text) 523 524 525 def retrieve_for_suite(self, suite_name=''): 526 """Scan through all tests and find all tests. 527 528 @param suite_name: If specified, this method will attempt to restrain 529 the search space to just this suite's control files. 530 531 @raises ControlVariableException: If forgiving_parser is False and there 532 is a syntax error in a control file. 533 534 @returns a dictionary of ControlData objects that based on given 535 parameters. 536 """ 537 control_file_texts = self._get_cf_texts_for_suite(suite_name) 538 return self._parse_cf_text_many(control_file_texts) 539 540 541 def _filter_cf_paths(self, paths): 542 """Remove certain control file paths 543 544 @param paths: Iterable of paths 545 @returns: generator yielding paths 546 """ 547 matcher = re.compile(r'[^/]+/(deps|profilers)/.+') 548 return (path for path in paths if not matcher.match(path)) 549 550 551 def _get_cf_texts_for_suite(self, suite_name): 552 """Get control file content for given suite. 553 554 @param suite_name: If specified, this method will attempt to restrain 555 the search space to just this suite's control files. 556 @returns: generator yielding (path, text) tuples 557 """ 558 files = self._cf_getter.get_control_file_list(suite_name=suite_name) 559 filtered_files = self._filter_cf_paths(files) 560 for path in filtered_files: 561 yield path, self._cf_getter.get_control_file_contents(path) 562 563 564 def _parse_cf_text_many(self, control_file_texts): 565 """Parse control file texts. 566 567 @param control_file_texts: iterable of (path, text) pairs 568 @returns: a dictionary of ControlData objects 569 """ 570 tests = {} 571 for path, text in control_file_texts: 572 # Seed test_args into the control file. 573 if self._test_args: 574 text = tools.inject_vars(self._test_args, text) 575 try: 576 found_test = self._parse_cf_text(path, text) 577 except control_data.ControlVariableException, e: 578 if not self._forgiving_parser: 579 msg = "Failed parsing %s\n%s" % (path, e) 580 raise control_data.ControlVariableException(msg) 581 logging.warning("Skipping %s\n%s", path, e) 582 except Exception, e: 583 logging.error("Bad %s\n%s", path, e) 584 else: 585 tests[path] = found_test 586 return tests 587 588 589 def _parse_cf_text(self, path, text): 590 """Parse control file text. 591 592 This ignores forgiving_parser because we cannot return a 593 forgiving value. 594 595 @param path: path to control file 596 @param text: control file text contents 597 @returns: a ControlData object 598 599 @raises ControlVariableException: There is a syntax error in a 600 control file. 601 """ 602 test = control_data.parse_control_string( 603 text, raise_warnings=True, path=path) 604 test.text = text 605 if self._run_prod_code: 606 test.require_ssp = False 607 return test 608 609 610class _BatchControlFileRetriever(_ControlFileRetriever): 611 """Subclass that can retrieve suite control files in batch.""" 612 613 614 def _get_cf_texts_for_suite(self, suite_name): 615 """Get control file content for given suite. 616 617 @param suite_name: If specified, this method will attempt to restrain 618 the search space to just this suite's control files. 619 @returns: generator yielding (path, text) tuples 620 """ 621 suite_info = self._cf_getter.get_suite_info(suite_name=suite_name) 622 files = suite_info.keys() 623 filtered_files = self._filter_cf_paths(files) 624 for path in filtered_files: 625 yield path, suite_info[path] 626 627 628def get_test_source_build(builds, **dargs): 629 """Get the build of test code. 630 631 Get the test source build from arguments. If parameter 632 `test_source_build` is set and has a value, return its value. Otherwise 633 returns the ChromeOS build name if it exists. If ChromeOS build is not 634 specified either, raise SuiteArgumentException. 635 636 @param builds: the builds on which we're running this suite. It's a 637 dictionary of version_prefix:build. 638 @param **dargs: Any other Suite constructor parameters, as described 639 in Suite.__init__ docstring. 640 641 @return: The build contains the test code. 642 @raise: SuiteArgumentException if both test_source_build and ChromeOS 643 build are not specified. 644 645 """ 646 if dargs.get('test_source_build', None): 647 return dargs['test_source_build'] 648 cros_build = builds.get(provision.CROS_VERSION_PREFIX, None) 649 if cros_build.endswith(provision.CHEETS_SUFFIX): 650 test_source_build = re.sub( 651 provision.CHEETS_SUFFIX + '$', '', cros_build) 652 else: 653 test_source_build = cros_build 654 if not test_source_build: 655 raise error.SuiteArgumentException( 656 'test_source_build must be specified if CrOS build is not ' 657 'specified.') 658 return test_source_build 659 660 661def list_all_suites(build, devserver, cf_getter=None): 662 """ 663 Parses all ControlData objects with a SUITE tag and extracts all 664 defined suite names. 665 666 @param build: the build on which we're running this suite. 667 @param devserver: the devserver which contains the build. 668 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to 669 using DevServerGetter. 670 671 @return list of suites 672 """ 673 if cf_getter is None: 674 cf_getter = _create_ds_getter(build, devserver) 675 676 suites = set() 677 predicate = lambda t: True 678 for test in find_and_parse_tests(cf_getter, predicate): 679 suites.update(test.suite_tag_parts) 680 return list(suites) 681 682 683def test_file_similarity_predicate(test_file_pattern): 684 """Returns predicate that gets the similarity based on a test's file 685 name pattern. 686 687 Builds a predicate that takes in a parsed control file (a ControlData) 688 and returns a tuple of (file path, ratio), where ratio is the 689 similarity between the test file name and the given test_file_pattern. 690 691 @param test_file_pattern: regular expression (string) to match against 692 control file names. 693 @return a callable that takes a ControlData and and returns a tuple of 694 (file path, ratio), where ratio is the similarity between the 695 test file name and the given test_file_pattern. 696 """ 697 return lambda t: ((None, 0) if not hasattr(t, 'path') else 698 (t.path, difflib.SequenceMatcher(a=t.path, 699 b=test_file_pattern).ratio())) 700 701 702def test_name_similarity_predicate(test_name): 703 """Returns predicate that matched based on a test's name. 704 705 Builds a predicate that takes in a parsed control file (a ControlData) 706 and returns a tuple of (test name, ratio), where ratio is the similarity 707 between the test name and the given test_name. 708 709 @param test_name: the test name to base the predicate on. 710 @return a callable that takes a ControlData and returns a tuple of 711 (test name, ratio), where ratio is the similarity between the 712 test name and the given test_name. 713 """ 714 return lambda t: ((None, 0) if not hasattr(t, 'name') else 715 (t.name, 716 difflib.SequenceMatcher(a=t.name, b=test_name).ratio())) 717 718 719def matches_attribute_expression_predicate(test_attr_boolstr): 720 """Returns predicate that matches based on boolean expression of 721 attributes. 722 723 Builds a predicate that takes in a parsed control file (a ControlData) 724 ans returns True if the test attributes satisfy the given attribute 725 boolean expression. 726 727 @param test_attr_boolstr: boolean expression of the attributes to be 728 test, like 'system:all and interval:daily'. 729 730 @return a callable that takes a ControlData and returns True if the test 731 attributes satisfy the given boolean expression. 732 """ 733 return lambda t: boolparse_lib.BoolstrResult( 734 test_attr_boolstr, t.attributes) 735 736 737def test_file_matches_pattern_predicate(test_file_pattern): 738 """Returns predicate that matches based on a test's file name pattern. 739 740 Builds a predicate that takes in a parsed control file (a ControlData) 741 and returns True if the test's control file name matches the given 742 regular expression. 743 744 @param test_file_pattern: regular expression (string) to match against 745 control file names. 746 @return a callable that takes a ControlData and and returns 747 True if control file name matches the pattern. 748 """ 749 return lambda t: hasattr(t, 'path') and re.match(test_file_pattern, 750 t.path) 751 752 753def test_name_matches_pattern_predicate(test_name_pattern): 754 """Returns predicate that matches based on a test's name pattern. 755 756 Builds a predicate that takes in a parsed control file (a ControlData) 757 and returns True if the test name matches the given regular expression. 758 759 @param test_name_pattern: regular expression (string) to match against 760 test names. 761 @return a callable that takes a ControlData and returns 762 True if the name fields matches the pattern. 763 """ 764 return lambda t: hasattr(t, 'name') and re.match(test_name_pattern, 765 t.name) 766 767 768def test_name_equals_predicate(test_name): 769 """Returns predicate that matched based on a test's name. 770 771 Builds a predicate that takes in a parsed control file (a ControlData) 772 and returns True if the test name is equal to |test_name|. 773 774 @param test_name: the test name to base the predicate on. 775 @return a callable that takes a ControlData and looks for |test_name| 776 in that ControlData's name. 777 """ 778 return lambda t: hasattr(t, 'name') and test_name == t.name 779 780 781def name_in_tag_similarity_predicate(name): 782 """Returns predicate that takes a control file and gets the similarity 783 of the suites in the control file and the given name. 784 785 Builds a predicate that takes in a parsed control file (a ControlData) 786 and returns a list of tuples of (suite name, ratio), where suite name 787 is each suite listed in the control file, and ratio is the similarity 788 between each suite and the given name. 789 790 @param name: the suite name to base the predicate on. 791 @return a callable that takes a ControlData and returns a list of tuples 792 of (suite name, ratio), where suite name is each suite listed in 793 the control file, and ratio is the similarity between each suite 794 and the given name. 795 """ 796 return lambda t: [(suite, 797 difflib.SequenceMatcher(a=suite, b=name).ratio()) 798 for suite in t.suite_tag_parts] or [(None, 0)] 799 800 801def name_in_tag_predicate(name): 802 """Returns predicate that takes a control file and looks for |name|. 803 804 Builds a predicate that takes in a parsed control file (a ControlData) 805 and returns True if the SUITE tag is present and contains |name|. 806 807 @param name: the suite name to base the predicate on. 808 @return a callable that takes a ControlData and looks for |name| in that 809 ControlData object's suite member. 810 """ 811 return lambda t: name in t.suite_tag_parts 812 813 814def create_fs_getter(autotest_dir): 815 """ 816 @param autotest_dir: the place to find autotests. 817 @return a FileSystemGetter instance that looks under |autotest_dir|. 818 """ 819 # currently hard-coded places to look for tests. 820 subpaths = ['server/site_tests', 'client/site_tests', 821 'server/tests', 'client/tests'] 822 directories = [os.path.join(autotest_dir, p) for p in subpaths] 823 return control_file_getter.FileSystemGetter(directories) 824 825 826def _create_ds_getter(build, devserver): 827 """ 828 @param build: the build on which we're running this suite. 829 @param devserver: the devserver which contains the build. 830 @return a FileSystemGetter instance that looks under |autotest_dir|. 831 """ 832 return control_file_getter.DevServerGetter(build, devserver) 833 834 835def _non_experimental_tests_predicate(test_data): 836 """Test predicate for non-experimental tests.""" 837 return not test_data.experimental 838 839 840def find_and_parse_tests(cf_getter, predicate, suite_name='', 841 add_experimental=False, forgiving_parser=True, 842 run_prod_code=False, test_args=None): 843 """ 844 Function to scan through all tests and find eligible tests. 845 846 Search through all tests based on given cf_getter, suite_name, 847 add_experimental and forgiving_parser, return the tests that match 848 given predicate. 849 850 @param cf_getter: a control_file_getter.ControlFileGetter used to list 851 and fetch the content of control files 852 @param predicate: a function that should return True when run over a 853 ControlData representation of a control file that should be in 854 this Suite. 855 @param suite_name: If specified, this method will attempt to restrain 856 the search space to just this suite's control files. 857 @param add_experimental: add tests with experimental attribute set. 858 @param forgiving_parser: If False, will raise ControlVariableExceptions 859 if any are encountered when parsing control 860 files. Note that this can raise an exception 861 for syntax errors in unrelated files, because 862 we parse them before applying the predicate. 863 @param run_prod_code: If true, the suite will run the test code that 864 lives in prod aka the test code currently on the 865 lab servers by disabling SSP for the discovered 866 tests. 867 @param test_args: A dict of args to be seeded in test control file. 868 869 @raises ControlVariableException: If forgiving_parser is False and there 870 is a syntax error in a control file. 871 872 @return list of ControlData objects that should be run, with control 873 file text added in |text| attribute. Results are sorted based 874 on the TIME setting in control file, slowest test comes first. 875 """ 876 logging.debug('Getting control file list for suite: %s', suite_name) 877 retriever = _get_cf_retriever(cf_getter, 878 forgiving_parser=forgiving_parser, 879 run_prod_code=run_prod_code, 880 test_args=test_args) 881 tests = retriever.retrieve_for_suite(suite_name) 882 logging.debug('Parsed %s control files.', len(tests)) 883 if not add_experimental: 884 predicate = _ComposedPredicate([predicate, 885 _non_experimental_tests_predicate]) 886 tests = [test for test in tests.itervalues() if predicate(test)] 887 tests.sort(key=lambda t: 888 control_data.ControlData.get_test_time_index(t.time), 889 reverse=True) 890 return tests 891 892 893def find_possible_tests(cf_getter, predicate, suite_name='', count=10): 894 """ 895 Function to scan through all tests and find possible tests. 896 897 Search through all tests based on given cf_getter, suite_name, 898 add_experimental and forgiving_parser. Use the given predicate to 899 calculate the similarity and return the top 10 matches. 900 901 @param cf_getter: a control_file_getter.ControlFileGetter used to list 902 and fetch the content of control files 903 @param predicate: a function that should return a tuple of (name, ratio) 904 when run over a ControlData representation of a control file that 905 should be in this Suite. `name` is the key to be compared, e.g., 906 a suite name or test name. `ratio` is a value between [0,1] 907 indicating the similarity of `name` and the value to be compared. 908 @param suite_name: If specified, this method will attempt to restrain 909 the search space to just this suite's control files. 910 @param count: Number of suggestions to return, default to 10. 911 912 @return list of top names that similar to the given test, sorted by 913 match ratio. 914 """ 915 logging.debug('Getting control file list for suite: %s', suite_name) 916 tests = _get_cf_retriever(cf_getter).retrieve_for_suite(suite_name) 917 logging.debug('Parsed %s control files.', len(tests)) 918 similarities = {} 919 for test in tests.itervalues(): 920 ratios = predicate(test) 921 # Some predicates may return a list of tuples, e.g., 922 # name_in_tag_similarity_predicate. Convert all returns to a list. 923 if not isinstance(ratios, list): 924 ratios = [ratios] 925 for name, ratio in ratios: 926 similarities[name] = ratio 927 return [s[0] for s in 928 sorted(similarities.items(), key=operator.itemgetter(1), 929 reverse=True)][:count] 930 931 932def _deprecated_suite_method(func): 933 """Decorator for deprecated Suite static methods. 934 935 TODO(ayatane): This is used to decorate functions that are called as 936 static methods on Suite. 937 """ 938 @functools.wraps(func) 939 def wrapper(*args, **kwargs): 940 """Wraps |func| for warning.""" 941 warnings.warn('Calling method "%s" from Suite is deprecated' % 942 func.__name__) 943 return func(*args, **kwargs) 944 return staticmethod(wrapper) 945 946 947class _BaseSuite(object): 948 """ 949 A suite of tests, defined by some predicate over control file variables. 950 951 Given a place to search for control files a predicate to match the desired 952 tests, can gather tests and fire off jobs to run them, and then wait for 953 results. 954 955 @var _predicate: a function that should return True when run over a 956 ControlData representation of a control file that should be in 957 this Suite. 958 @var _tag: a string with which to tag jobs run in this suite. 959 @var _builds: the builds on which we're running this suite. 960 @var _afe: an instance of AFE as defined in server/frontend.py. 961 @var _tko: an instance of TKO as defined in server/frontend.py. 962 @var _jobs: currently scheduled jobs, if any. 963 @var _jobs_to_tests: a dictionary that maps job ids to tests represented 964 ControlData objects. 965 @var _retry: a bool value indicating whether jobs should be retried on 966 failure. 967 @var _retry_handler: a RetryHandler object. 968 969 """ 970 971 972 def __init__( 973 self, 974 tests, 975 tag, 976 builds, 977 board, 978 afe=None, 979 tko=None, 980 pool=None, 981 results_dir=None, 982 max_runtime_mins=24*60, 983 timeout_mins=24*60, 984 file_bugs=False, 985 suite_job_id=None, 986 ignore_deps=False, 987 extra_deps=None, 988 priority=priorities.Priority.DEFAULT, 989 wait_for_results=True, 990 job_retry=False, 991 max_retries=sys.maxint, 992 offload_failures_only=False, 993 test_source_build=None, 994 job_keyvals=None, 995 child_dependencies=(), 996 result_reporter=None, 997 ): 998 """Initialize instance. 999 1000 @param tests: Iterable of tests to run. 1001 @param tag: a string with which to tag jobs run in this suite. 1002 @param builds: the builds on which we're running this suite. 1003 @param board: the board on which we're running this suite. 1004 @param afe: an instance of AFE as defined in server/frontend.py. 1005 @param tko: an instance of TKO as defined in server/frontend.py. 1006 @param pool: Specify the pool of machines to use for scheduling 1007 purposes. 1008 @param results_dir: The directory where the job can write results to. 1009 This must be set if you want job_id of sub-jobs 1010 list in the job keyvals. 1011 @param max_runtime_mins: Maximum suite runtime, in minutes. 1012 @param timeout: Maximum job lifetime, in hours. 1013 @param suite_job_id: Job id that will act as parent id to all sub jobs. 1014 Default: None 1015 @param ignore_deps: True if jobs should ignore the DEPENDENCIES 1016 attribute and skip applying of dependency labels. 1017 (Default:False) 1018 @param extra_deps: A list of strings which are the extra DEPENDENCIES 1019 to add to each test being scheduled. 1020 @param priority: Integer priority level. Higher is more important. 1021 @param wait_for_results: Set to False to run the suite job without 1022 waiting for test jobs to finish. Default is 1023 True. 1024 @param job_retry: A bool value indicating whether jobs should be retired 1025 on failure. If True, the field 'JOB_RETRIES' in 1026 control files will be respected. If False, do not 1027 retry. 1028 @param max_retries: Maximum retry limit at suite level. 1029 Regardless how many times each individual test 1030 has been retried, the total number of retries 1031 happening in the suite can't exceed _max_retries. 1032 Default to sys.maxint. 1033 @param offload_failures_only: Only enable gs_offloading for failed 1034 jobs. 1035 @param test_source_build: Build that contains the server-side test code. 1036 @param job_keyvals: General job keyvals to be inserted into keyval file, 1037 which will be used by tko/parse later. 1038 @param child_dependencies: (optional) list of dependency strings 1039 to be added as dependencies to child jobs. 1040 @param result_reporter: A _ResultReporter instance to report results. If 1041 None, an _EmailReporter will be created. 1042 """ 1043 1044 self.tests = list(tests) 1045 self._tag = tag 1046 self._builds = builds 1047 self._results_dir = results_dir 1048 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30, 1049 delay_sec=10, 1050 debug=False) 1051 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30, 1052 delay_sec=10, 1053 debug=False) 1054 self._jobs = [] 1055 self._jobs_to_tests = {} 1056 1057 self._file_bugs = file_bugs 1058 self._suite_job_id = suite_job_id 1059 self._job_retry=job_retry 1060 self._max_retries = max_retries 1061 # RetryHandler to be initialized in schedule() 1062 self._retry_handler = None 1063 self.wait_for_results = wait_for_results 1064 self._job_keyvals = job_keyvals 1065 if result_reporter is None: 1066 self._result_reporter = _EmailReporter(self) 1067 else: 1068 self._result_reporter = result_reporter 1069 1070 if extra_deps is None: 1071 extra_deps = [] 1072 extra_deps.append(board) 1073 if pool: 1074 extra_deps.append(pool) 1075 extra_deps.extend(child_dependencies) 1076 self._dependencies = tuple(extra_deps) 1077 1078 self._job_creator = _SuiteChildJobCreator( 1079 tag=tag, 1080 builds=builds, 1081 board=board, 1082 afe=afe, 1083 max_runtime_mins=max_runtime_mins, 1084 timeout_mins=timeout_mins, 1085 suite_job_id=suite_job_id, 1086 ignore_deps=ignore_deps, 1087 extra_deps=extra_deps, 1088 priority=priority, 1089 offload_failures_only=offload_failures_only, 1090 test_source_build=test_source_build, 1091 job_keyvals=job_keyvals, 1092 ) 1093 1094 1095 def _schedule_test(self, record, test, retry_for=None): 1096 """Schedule a single test and return the job. 1097 1098 Schedule a single test by creating a job, and then update relevant 1099 data structures that are used to keep track of all running jobs. 1100 1101 Emits a TEST_NA status log entry if it failed to schedule the test due 1102 to NoEligibleHostException or a non-existent board label. 1103 1104 Returns a frontend.Job object if the test is successfully scheduled. 1105 If scheduling failed due to NoEligibleHostException or a non-existent 1106 board label, returns None. 1107 1108 @param record: A callable to use for logging. 1109 prototype: record(base_job.status_log_entry) 1110 @param test: ControlData for a test to run. 1111 @param retry_for: If we are scheduling a test to retry an 1112 old job, the afe_job_id of the old job 1113 will be passed in as |retry_for|. 1114 1115 @returns: A frontend.Job object or None 1116 """ 1117 msg = 'Scheduling %s' % test.name 1118 if retry_for: 1119 msg = msg + ', to retry afe job %d' % retry_for 1120 logging.debug(msg) 1121 begin_time_str = datetime.datetime.now().strftime(time_utils.TIME_FMT) 1122 try: 1123 job = self._job_creator.create_job(test, retry_for=retry_for) 1124 except (error.NoEligibleHostException, proxy.ValidationError) as e: 1125 if (isinstance(e, error.NoEligibleHostException) 1126 or (isinstance(e, proxy.ValidationError) 1127 and _is_nonexistent_board_error(e))): 1128 # Treat a dependency on a non-existent board label the same as 1129 # a dependency on a board that exists, but for which there's no 1130 # hardware. 1131 logging.debug('%s not applicable for this board/pool. ' 1132 'Emitting TEST_NA.', test.name) 1133 Status('TEST_NA', test.name, 1134 'Skipping: test not supported on this board/pool.', 1135 begin_time_str=begin_time_str).record_all(record) 1136 return None 1137 else: 1138 raise e 1139 except (error.RPCException, proxy.JSONRPCException): 1140 if retry_for: 1141 # Mark that we've attempted to retry the old job. 1142 self._retry_handler.set_attempted(job_id=retry_for) 1143 raise 1144 else: 1145 self._jobs.append(job) 1146 self._jobs_to_tests[job.id] = test 1147 if retry_for: 1148 # A retry job was just created, record it. 1149 self._retry_handler.add_retry( 1150 old_job_id=retry_for, new_job_id=job.id) 1151 retry_count = (test.job_retries - 1152 self._retry_handler.get_retry_max(job.id)) 1153 logging.debug('Job %d created to retry job %d. ' 1154 'Have retried for %d time(s)', 1155 job.id, retry_for, retry_count) 1156 self._remember_job_keyval(job) 1157 return job 1158 1159 1160 def schedule(self, record): 1161 """ 1162 Schedule jobs using |self._afe|. 1163 1164 frontend.Job objects representing each scheduled job will be put in 1165 |self._jobs|. 1166 1167 @param record: A callable to use for logging. 1168 prototype: record(base_job.status_log_entry) 1169 @returns: The number of tests that were scheduled. 1170 """ 1171 scheduled_test_names = [] 1172 logging.debug('Discovered %d tests.', len(self.tests)) 1173 1174 Status('INFO', 'Start %s' % self._tag).record_result(record) 1175 try: 1176 # Write job_keyvals into keyval file. 1177 if self._job_keyvals: 1178 utils.write_keyval(self._results_dir, self._job_keyvals) 1179 1180 # TODO(crbug.com/730885): This is a hack to protect tests that are 1181 # not usually retried from getting hit by a provision error when run 1182 # as part of a suite. Remove this hack once provision is separated 1183 # out in its own suite. 1184 self._bump_up_test_retries(self.tests) 1185 for test in self.tests: 1186 scheduled_job = self._schedule_test(record, test) 1187 if scheduled_job is not None: 1188 scheduled_test_names.append(test.name) 1189 1190 # Write the num of scheduled tests and name of them to keyval file. 1191 logging.debug('Scheduled %d tests, writing the total to keyval.', 1192 len(scheduled_test_names)) 1193 utils.write_keyval( 1194 self._results_dir, 1195 self._make_scheduled_tests_keyvals(scheduled_test_names)) 1196 except Exception: 1197 logging.exception('Exception while scheduling suite') 1198 Status('FAIL', self._tag, 1199 'Exception while scheduling suite').record_result(record) 1200 1201 if self._job_retry: 1202 self._retry_handler = RetryHandler( 1203 initial_jobs_to_tests=self._jobs_to_tests, 1204 max_retries=self._max_retries) 1205 return len(scheduled_test_names) 1206 1207 1208 def _bump_up_test_retries(self, tests): 1209 """Bump up individual test retries to match suite retry options.""" 1210 if not self._job_retry: 1211 return 1212 1213 for test in tests: 1214 # We do honor if a test insists on JOB_RETRIES = 0. 1215 if test.job_retries is None: 1216 logging.debug( 1217 'Test %s did not request retries, but suite requires ' 1218 'retries. Bumping retries up to 1. ' 1219 '(See crbug.com/730885)', 1220 test.name) 1221 test.job_retries = 1 1222 1223 1224 def _make_scheduled_tests_keyvals(self, scheduled_test_names): 1225 """Make a keyvals dict to write for scheduled test names. 1226 1227 @param scheduled_test_names: A list of scheduled test name strings. 1228 1229 @returns: A keyvals dict. 1230 """ 1231 return { 1232 constants.SCHEDULED_TEST_COUNT_KEY: len(scheduled_test_names), 1233 constants.SCHEDULED_TEST_NAMES_KEY: repr(scheduled_test_names), 1234 } 1235 1236 1237 def _should_report(self, result): 1238 """ 1239 Returns True if this failure requires to be reported. 1240 1241 @param result: A result, encapsulating the status of the failed job. 1242 @return: True if we should report this failure. 1243 """ 1244 return (self._file_bugs and result.test_executed and 1245 not result.is_testna() and 1246 result.is_worse_than(job_status.Status('GOOD', '', 'reason'))) 1247 1248 1249 def _has_retry(self, result): 1250 """ 1251 Return True if this result gets to retry. 1252 1253 @param result: A result, encapsulating the status of the failed job. 1254 @return: bool 1255 """ 1256 return (self._job_retry 1257 and self._retry_handler.has_following_retry(result)) 1258 1259 1260 def wait(self, record): 1261 """ 1262 Polls for the job statuses, using |record| to print status when each 1263 completes. 1264 1265 @param record: callable that records job status. 1266 prototype: 1267 record(base_job.status_log_entry) 1268 """ 1269 waiter = job_status.JobResultWaiter(self._afe, self._tko) 1270 try: 1271 if self._suite_job_id: 1272 jobs = self._afe.get_jobs(parent_job_id=self._suite_job_id) 1273 else: 1274 logging.warning('Unknown suite_job_id, falling back to less ' 1275 'efficient results_generator.') 1276 jobs = self._jobs 1277 waiter.add_jobs(jobs) 1278 for result in waiter.wait_for_results(): 1279 self._handle_result(result=result, record=record, waiter=waiter) 1280 if self._finished_waiting(): 1281 break 1282 except Exception: # pylint: disable=W0703 1283 logging.exception('Exception waiting for results') 1284 Status('FAIL', self._tag, 1285 'Exception waiting for results').record_result(record) 1286 1287 1288 def _finished_waiting(self): 1289 """Return whether the suite is finished waiting for child jobs.""" 1290 return False 1291 1292 1293 def _handle_result(self, result, record, waiter): 1294 """ 1295 Handle a test job result. 1296 1297 @param result: Status instance for job. 1298 @param record: callable that records job status. 1299 prototype: 1300 record(base_job.status_log_entry) 1301 @param waiter: JobResultsWaiter instance. 1302 @param reporter: _ResultReporter instance. 1303 """ 1304 self._record_result(result, record) 1305 rescheduled = False 1306 if self._job_retry and self._retry_handler._should_retry(result): 1307 rescheduled = self._retry_result(result, record, waiter) 1308 # TODO (crbug.com/751428): If the suite times out before a retry could 1309 # finish, we would lose the chance to report errors from the original 1310 # job. 1311 if self._has_retry(result) and rescheduled: 1312 return 1313 1314 if self._should_report(result): 1315 self._result_reporter.report(result) 1316 1317 1318 def _record_result(self, result, record): 1319 """ 1320 Record a test job result. 1321 1322 @param result: Status instance for job. 1323 @param record: callable that records job status. 1324 prototype: 1325 record(base_job.status_log_entry) 1326 """ 1327 result.record_all(record) 1328 self._remember_job_keyval(result) 1329 1330 1331 def _retry_result(self, result, record, waiter): 1332 """ 1333 Retry a test job result. 1334 1335 @param result: Status instance for job. 1336 @param record: callable that records job status. 1337 prototype: 1338 record(base_job.status_log_entry) 1339 @param waiter: JobResultsWaiter instance. 1340 @returns: True if a job was scheduled for retry, False otherwise. 1341 """ 1342 test = self._jobs_to_tests[result.id] 1343 try: 1344 # It only takes effect for CQ retriable job: 1345 # 1) in first try, test.fast=True. 1346 # 2) in second try, test will be run in normal mode, so reset 1347 # test.fast=False. 1348 test.fast = False 1349 new_job = self._schedule_test( 1350 record=record, test=test, retry_for=result.id) 1351 except (error.RPCException, proxy.JSONRPCException) as e: 1352 logging.error('Failed to schedule test: %s, Reason: %s', 1353 test.name, e) 1354 return False 1355 else: 1356 waiter.add_job(new_job) 1357 return bool(new_job) 1358 1359 1360 @property 1361 def _should_file_bugs(self): 1362 """Return whether bugs should be filed. 1363 1364 @returns: bool 1365 """ 1366 # File bug when failure is one of the _FILE_BUG_SUITES, 1367 # otherwise send an email to the owner anc cc. 1368 return self._tag in _FILE_BUG_SUITES 1369 1370 1371 def abort(self): 1372 """ 1373 Abort all scheduled test jobs. 1374 """ 1375 if self._jobs: 1376 job_ids = [job.id for job in self._jobs] 1377 self._afe.run('abort_host_queue_entries', job__id__in=job_ids) 1378 1379 1380 def _remember_job_keyval(self, job): 1381 """ 1382 Record provided job as a suite job keyval, for later referencing. 1383 1384 @param job: some representation of a job that has the attributes: 1385 id, test_name, and owner 1386 """ 1387 if self._results_dir and job.id and job.owner and job.test_name: 1388 job_id_owner = '%s-%s' % (job.id, job.owner) 1389 logging.debug('Adding job keyval for %s=%s', 1390 job.test_name, job_id_owner) 1391 utils.write_keyval( 1392 self._results_dir, 1393 {hashlib.md5(job.test_name).hexdigest(): job_id_owner}) 1394 1395 1396class Suite(_BaseSuite): 1397 """ 1398 A suite of tests, defined by some predicate over control file variables. 1399 1400 Given a place to search for control files a predicate to match the desired 1401 tests, can gather tests and fire off jobs to run them, and then wait for 1402 results. 1403 1404 @var _predicate: a function that should return True when run over a 1405 ControlData representation of a control file that should be in 1406 this Suite. 1407 @var _tag: a string with which to tag jobs run in this suite. 1408 @var _builds: the builds on which we're running this suite. 1409 @var _afe: an instance of AFE as defined in server/frontend.py. 1410 @var _tko: an instance of TKO as defined in server/frontend.py. 1411 @var _jobs: currently scheduled jobs, if any. 1412 @var _jobs_to_tests: a dictionary that maps job ids to tests represented 1413 ControlData objects. 1414 @var _cf_getter: a control_file_getter.ControlFileGetter 1415 @var _retry: a bool value indicating whether jobs should be retried on 1416 failure. 1417 @var _retry_handler: a RetryHandler object. 1418 1419 """ 1420 1421 # TODO(ayatane): These methods are kept on the Suite class for 1422 # backward compatibility. 1423 find_and_parse_tests = _deprecated_suite_method(find_and_parse_tests) 1424 find_possible_tests = _deprecated_suite_method(find_possible_tests) 1425 create_fs_getter = _deprecated_suite_method(create_fs_getter) 1426 name_in_tag_predicate = _deprecated_suite_method(name_in_tag_predicate) 1427 name_in_tag_similarity_predicate = _deprecated_suite_method( 1428 name_in_tag_similarity_predicate) 1429 test_name_equals_predicate = _deprecated_suite_method( 1430 test_name_equals_predicate) 1431 test_name_matches_pattern_predicate = _deprecated_suite_method( 1432 test_name_matches_pattern_predicate) 1433 test_file_matches_pattern_predicate = _deprecated_suite_method( 1434 test_file_matches_pattern_predicate) 1435 matches_attribute_expression_predicate = _deprecated_suite_method( 1436 matches_attribute_expression_predicate) 1437 test_name_similarity_predicate = _deprecated_suite_method( 1438 test_name_similarity_predicate) 1439 test_file_similarity_predicate = _deprecated_suite_method( 1440 test_file_similarity_predicate) 1441 list_all_suites = _deprecated_suite_method(list_all_suites) 1442 get_test_source_build = _deprecated_suite_method(get_test_source_build) 1443 1444 1445 @classmethod 1446 def create_from_predicates(cls, predicates, builds, board, devserver, 1447 cf_getter=None, name='ad_hoc_suite', 1448 run_prod_code=False, **dargs): 1449 """ 1450 Create a Suite using a given predicate test filters. 1451 1452 Uses supplied predicate(s) to instantiate a Suite. Looks for tests in 1453 |autotest_dir| and will schedule them using |afe|. Pulls control files 1454 from the default dev server. Results will be pulled from |tko| upon 1455 completion. 1456 1457 @param predicates: A list of callables that accept ControlData 1458 representations of control files. A test will be 1459 included in suite if all callables in this list 1460 return True on the given control file. 1461 @param builds: the builds on which we're running this suite. It's a 1462 dictionary of version_prefix:build. 1463 @param board: the board on which we're running this suite. 1464 @param devserver: the devserver which contains the build. 1465 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to 1466 using DevServerGetter. 1467 @param name: name of suite. Defaults to 'ad_hoc_suite' 1468 @param run_prod_code: If true, the suite will run the tests that 1469 lives in prod aka the test code currently on the 1470 lab servers. 1471 @param **dargs: Any other Suite constructor parameters, as described 1472 in Suite.__init__ docstring. 1473 @return a Suite instance. 1474 """ 1475 if cf_getter is None: 1476 if run_prod_code: 1477 cf_getter = create_fs_getter(_AUTOTEST_DIR) 1478 else: 1479 build = get_test_source_build(builds, **dargs) 1480 cf_getter = _create_ds_getter(build, devserver) 1481 1482 return cls(predicates, 1483 name, builds, board, cf_getter, run_prod_code, **dargs) 1484 1485 1486 @classmethod 1487 def create_from_name(cls, name, builds, board, devserver, cf_getter=None, 1488 **dargs): 1489 """ 1490 Create a Suite using a predicate based on the SUITE control file var. 1491 1492 Makes a predicate based on |name| and uses it to instantiate a Suite 1493 that looks for tests in |autotest_dir| and will schedule them using 1494 |afe|. Pulls control files from the default dev server. 1495 Results will be pulled from |tko| upon completion. 1496 1497 @param name: a value of the SUITE control file variable to search for. 1498 @param builds: the builds on which we're running this suite. It's a 1499 dictionary of version_prefix:build. 1500 @param board: the board on which we're running this suite. 1501 @param devserver: the devserver which contains the build. 1502 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to 1503 using DevServerGetter. 1504 @param **dargs: Any other Suite constructor parameters, as described 1505 in Suite.__init__ docstring. 1506 @return a Suite instance. 1507 """ 1508 if cf_getter is None: 1509 build = get_test_source_build(builds, **dargs) 1510 cf_getter = _create_ds_getter(build, devserver) 1511 1512 return cls([name_in_tag_predicate(name)], 1513 name, builds, board, cf_getter, **dargs) 1514 1515 1516 def __init__( 1517 self, 1518 predicates, 1519 tag, 1520 builds, 1521 board, 1522 cf_getter, 1523 run_prod_code=False, 1524 afe=None, 1525 tko=None, 1526 pool=None, 1527 results_dir=None, 1528 max_runtime_mins=24*60, 1529 timeout_mins=24*60, 1530 file_bugs=False, 1531 suite_job_id=None, 1532 ignore_deps=False, 1533 extra_deps=None, 1534 priority=priorities.Priority.DEFAULT, 1535 forgiving_parser=True, 1536 wait_for_results=True, 1537 job_retry=False, 1538 max_retries=sys.maxint, 1539 offload_failures_only=False, 1540 test_source_build=None, 1541 job_keyvals=None, 1542 test_args=None, 1543 child_dependencies=(), 1544 result_reporter=None, 1545 ): 1546 """ 1547 Constructor 1548 1549 @param predicates: A list of callables that accept ControlData 1550 representations of control files. A test will be 1551 included in suite if all callables in this list 1552 return True on the given control file. 1553 @param tag: a string with which to tag jobs run in this suite. 1554 @param builds: the builds on which we're running this suite. 1555 @param board: the board on which we're running this suite. 1556 @param cf_getter: a control_file_getter.ControlFileGetter 1557 @param afe: an instance of AFE as defined in server/frontend.py. 1558 @param tko: an instance of TKO as defined in server/frontend.py. 1559 @param pool: Specify the pool of machines to use for scheduling 1560 purposes. 1561 @param run_prod_code: If true, the suite will run the test code that 1562 lives in prod aka the test code currently on the 1563 lab servers. 1564 @param results_dir: The directory where the job can write results to. 1565 This must be set if you want job_id of sub-jobs 1566 list in the job keyvals. 1567 @param max_runtime_mins: Maximum suite runtime, in minutes. 1568 @param timeout: Maximum job lifetime, in hours. 1569 @param suite_job_id: Job id that will act as parent id to all sub jobs. 1570 Default: None 1571 @param ignore_deps: True if jobs should ignore the DEPENDENCIES 1572 attribute and skip applying of dependency labels. 1573 (Default:False) 1574 @param extra_deps: A list of strings which are the extra DEPENDENCIES 1575 to add to each test being scheduled. 1576 @param priority: Integer priority level. Higher is more important. 1577 @param wait_for_results: Set to False to run the suite job without 1578 waiting for test jobs to finish. Default is 1579 True. 1580 @param job_retry: A bool value indicating whether jobs should be retired 1581 on failure. If True, the field 'JOB_RETRIES' in 1582 control files will be respected. If False, do not 1583 retry. 1584 @param max_retries: Maximum retry limit at suite level. 1585 Regardless how many times each individual test 1586 has been retried, the total number of retries 1587 happening in the suite can't exceed _max_retries. 1588 Default to sys.maxint. 1589 @param offload_failures_only: Only enable gs_offloading for failed 1590 jobs. 1591 @param test_source_build: Build that contains the server-side test code. 1592 @param job_keyvals: General job keyvals to be inserted into keyval file, 1593 which will be used by tko/parse later. 1594 @param test_args: A dict of args passed all the way to each individual 1595 test that will be actually ran. 1596 @param child_dependencies: (optional) list of dependency strings 1597 to be added as dependencies to child jobs. 1598 @param result_reporter: A _ResultReporter instance to report results. If 1599 None, an _EmailReporter will be created. 1600 """ 1601 tests = find_and_parse_tests( 1602 cf_getter, 1603 _ComposedPredicate(predicates), 1604 tag, 1605 forgiving_parser=forgiving_parser, 1606 run_prod_code=run_prod_code, 1607 test_args=test_args, 1608 ) 1609 super(Suite, self).__init__( 1610 tests=tests, 1611 tag=tag, 1612 builds=builds, 1613 board=board, 1614 afe=afe, 1615 tko=tko, 1616 pool=pool, 1617 results_dir=results_dir, 1618 max_runtime_mins=max_runtime_mins, 1619 timeout_mins=timeout_mins, 1620 file_bugs=file_bugs, 1621 suite_job_id=suite_job_id, 1622 ignore_deps=ignore_deps, 1623 extra_deps=extra_deps, 1624 priority=priority, 1625 wait_for_results=wait_for_results, 1626 job_retry=job_retry, 1627 max_retries=max_retries, 1628 offload_failures_only=offload_failures_only, 1629 test_source_build=test_source_build, 1630 job_keyvals=job_keyvals, 1631 child_dependencies=child_dependencies, 1632 result_reporter=result_reporter, 1633 ) 1634 1635 1636class ProvisionSuite(_BaseSuite): 1637 """ 1638 A suite for provisioning DUTs. 1639 1640 This is done by creating dummy_Pass tests. 1641 """ 1642 1643 1644 def __init__( 1645 self, 1646 tag, 1647 builds, 1648 board, 1649 devserver, 1650 num_required, 1651 num_max=float('inf'), 1652 cf_getter=None, 1653 run_prod_code=False, 1654 test_args=None, 1655 test_source_build=None, 1656 **kwargs): 1657 """ 1658 Constructor 1659 1660 @param tag: a string with which to tag jobs run in this suite. 1661 @param builds: the builds on which we're running this suite. 1662 @param board: the board on which we're running this suite. 1663 @param devserver: the devserver which contains the build. 1664 @param num_required: number of tests that must pass. This is 1665 capped by the number of tests that are run. 1666 @param num_max: max number of tests to make. By default there 1667 is no cap, a test is created for each eligible host. 1668 @param cf_getter: a control_file_getter.ControlFileGetter. 1669 @param test_args: A dict of args passed all the way to each individual 1670 test that will be actually ran. 1671 @param test_source_build: Build that contains the server-side test code. 1672 @param kwargs: Various keyword arguments passed to 1673 _BaseSuite constructor. 1674 """ 1675 super(ProvisionSuite, self).__init__( 1676 tests=[], 1677 tag=tag, 1678 builds=builds, 1679 board=board, 1680 **kwargs) 1681 self._num_successful = 0 1682 self._num_required = 0 1683 self.tests = [] 1684 1685 static_deps = [dep for dep in self._dependencies 1686 if not provision.Provision.acts_on(dep)] 1687 if 'pool:suites' in static_deps: 1688 logging.info('Provision suite is disabled on suites pool') 1689 return 1690 logging.debug('Looking for hosts matching %r', static_deps) 1691 hosts = self._afe.get_hosts( 1692 invalid=False, multiple_labels=static_deps) 1693 logging.debug('Found %d matching hosts for ProvisionSuite', len(hosts)) 1694 available_hosts = [h for h in hosts if h.is_available()] 1695 logging.debug('Found %d available hosts for ProvisionSuite', 1696 len(available_hosts)) 1697 dummy_test = _load_dummy_test( 1698 builds, devserver, cf_getter, 1699 run_prod_code, test_args, test_source_build) 1700 self.tests = [dummy_test] * min(len(available_hosts), num_max) 1701 logging.debug('Made %d tests for ProvisionSuite', len(self.tests)) 1702 self._num_required = min(num_required, len(self.tests)) 1703 logging.debug('Expecting %d tests to pass for ProvisionSuite', 1704 self._num_required) 1705 1706 def _handle_result(self, result, record, waiter): 1707 super(ProvisionSuite, self)._handle_result(result, record, waiter) 1708 if result.is_good(): 1709 self._num_successful += 1 1710 1711 def _finished_waiting(self): 1712 return self._num_successful >= self._num_required 1713 1714 1715def _load_dummy_test( 1716 builds, 1717 devserver, 1718 cf_getter=None, 1719 run_prod_code=False, 1720 test_args=None, 1721 test_source_build=None): 1722 """ 1723 Load and return the dummy pass test. 1724 1725 @param builds: the builds on which we're running this suite. 1726 @param devserver: the devserver which contains the build. 1727 @param cf_getter: a control_file_getter.ControlFileGetter. 1728 @param test_args: A dict of args passed all the way to each individual 1729 test that will be actually ran. 1730 @param test_source_build: Build that contains the server-side test code. 1731 """ 1732 if cf_getter is None: 1733 if run_prod_code: 1734 cf_getter = create_fs_getter(_AUTOTEST_DIR) 1735 else: 1736 build = get_test_source_build( 1737 builds, test_source_build=test_source_build) 1738 cf_getter = _create_ds_getter(build, devserver) 1739 retriever = _get_cf_retriever(cf_getter, 1740 run_prod_code=run_prod_code, 1741 test_args=test_args) 1742 return retriever.retrieve('dummy_Pass') 1743 1744 1745class _ComposedPredicate(object): 1746 """Return the composition of the predicates. 1747 1748 Predicates are functions that take a test control data object and 1749 return True of that test is to be included. The returned 1750 predicate's set is the intersection of all of the input predicates' 1751 sets (it returns True if all predicates return True). 1752 """ 1753 1754 def __init__(self, predicates): 1755 """Initialize instance. 1756 1757 @param predicates: Iterable of predicates. 1758 """ 1759 self._predicates = list(predicates) 1760 1761 def __repr__(self): 1762 return '{cls}({this._predicates!r})'.format( 1763 cls=type(self).__name__, 1764 this=self, 1765 ) 1766 1767 def __call__(self, control_data_): 1768 return all(f(control_data_) for f in self._predicates) 1769 1770 1771def _is_nonexistent_board_error(e): 1772 """Return True if error is caused by nonexistent board label. 1773 1774 As of this writing, the particular case we want looks like this: 1775 1776 1) e.problem_keys is a dictionary 1777 2) e.problem_keys['meta_hosts'] exists as the only key 1778 in the dictionary. 1779 3) e.problem_keys['meta_hosts'] matches this pattern: 1780 "Label "board:.*" not found" 1781 1782 We check for conditions 1) and 2) on the 1783 theory that they're relatively immutable. 1784 We don't check condition 3) because it seems 1785 likely to be a maintenance burden, and for the 1786 times when we're wrong, being right shouldn't 1787 matter enough (we _hope_). 1788 1789 @param e: proxy.ValidationError instance 1790 @returns: boolean 1791 """ 1792 return (isinstance(e.problem_keys, dict) 1793 and len(e.problem_keys) == 1 1794 and 'meta_hosts' in e.problem_keys) 1795 1796 1797class _ResultReporter(object): 1798 """Abstract base class for reporting test results. 1799 1800 Usually, this is used to report test failures. 1801 """ 1802 1803 __metaclass__ = abc.ABCMeta 1804 1805 @abc.abstractmethod 1806 def report(self, result): 1807 """Report test result. 1808 1809 @param result: Status instance for job. 1810 """ 1811 1812 1813class _EmailReporter(_ResultReporter): 1814 """Class that emails based on test failures.""" 1815 1816 # TODO(akeshet): Document what |bug_template| is actually supposed to come 1817 # from, and rename it to something unrelated to "bugs" which are no longer 1818 # relevant now that this is purely an email sender. 1819 def __init__(self, suite, bug_template=None): 1820 self._suite = suite 1821 self._bug_template = bug_template or {} 1822 1823 def _get_test_bug(self, result): 1824 """Get TestBug for the given result. 1825 1826 @param result: Status instance for a test job. 1827 @returns: TestBug instance. 1828 """ 1829 # reporting modules have dependency on external packages, e.g., httplib2 1830 # Such dependency can cause issue to any module tries to import suite.py 1831 # without building site-packages first. Since the reporting modules are 1832 # only used in this function, move the imports here avoid the 1833 # requirement of building site packages to use other functions in this 1834 # module. 1835 from autotest_lib.server.cros.dynamic_suite import reporting 1836 1837 job_views = self._suite._tko.run('get_detailed_test_views', 1838 afe_job_id=result.id) 1839 return reporting.TestBug(self._suite._job_creator.cros_build, 1840 utils.get_chrome_version(job_views), 1841 self._suite._tag, 1842 result) 1843 1844 def _get_bug_template(self, result): 1845 """Get BugTemplate for test job. 1846 1847 @param result: Status instance for job. 1848 @param bug_template: A template dictionary specifying the default bug 1849 filing options for failures in this suite. 1850 @returns: BugTemplate instance 1851 """ 1852 # reporting modules have dependency on external packages, e.g., httplib2 1853 # Such dependency can cause issue to any module tries to import suite.py 1854 # without building site-packages first. Since the reporting modules are 1855 # only used in this function, move the imports here avoid the 1856 # requirement of building site packages to use other functions in this 1857 # module. 1858 from autotest_lib.server.cros.dynamic_suite import reporting_utils 1859 1860 # Try to merge with bug template in test control file. 1861 template = reporting_utils.BugTemplate(self._bug_template) 1862 try: 1863 test_data = self._suite._jobs_to_tests[result.id] 1864 return template.finalize_bug_template( 1865 test_data.bug_template) 1866 except AttributeError: 1867 # Test control file does not have bug template defined. 1868 return template.bug_template 1869 except reporting_utils.InvalidBugTemplateException as e: 1870 logging.error('Merging bug templates failed with ' 1871 'error: %s An empty bug template will ' 1872 'be used.', e) 1873 return {} 1874 1875 def report(self, result): 1876 # reporting modules have dependency on external 1877 # packages, e.g., httplib2 Such dependency can cause 1878 # issue to any module tries to import suite.py without 1879 # building site-packages first. Since the reporting 1880 # modules are only used in this function, move the 1881 # imports here avoid the requirement of building site 1882 # packages to use other functions in this module. 1883 from autotest_lib.server.cros.dynamic_suite import reporting 1884 1885 reporting.send_email( 1886 self._get_test_bug(result), 1887 self._get_bug_template(result)) 1888