1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import abc 6import datetime 7import difflib 8import functools 9import hashlib 10import logging 11import operator 12import os 13import re 14import sys 15import warnings 16 17import common 18 19from autotest_lib.frontend.afe.json_rpc import proxy 20from autotest_lib.client.common_lib import enum 21from autotest_lib.client.common_lib import error 22from autotest_lib.client.common_lib import global_config 23from autotest_lib.client.common_lib import priorities 24from autotest_lib.client.common_lib import time_utils 25from autotest_lib.client.common_lib import utils 26from autotest_lib.frontend.afe import model_attributes 27from autotest_lib.frontend.afe.json_rpc import proxy 28from autotest_lib.server.cros import provision 29from autotest_lib.server.cros.dynamic_suite import constants 30from autotest_lib.server.cros.dynamic_suite import control_file_getter 31from autotest_lib.server.cros.dynamic_suite import frontend_wrappers 32from autotest_lib.server.cros.dynamic_suite import job_status 33from autotest_lib.server.cros.dynamic_suite import suite_common 34from autotest_lib.server.cros.dynamic_suite import tools 35from autotest_lib.server.cros.dynamic_suite.job_status import Status 36 37try: 38 from autotest_lib.server.cros.dynamic_suite import boolparse_lib 39except ImportError as e: 40 print 'Unable to import boolparse_lib: %s' % (e,) 41 print 'This script must be either:' 42 print ' - Be run in the chroot.' 43 print ' - (not yet supported) be run after running ' 44 print ' ../utils/build_externals.py' 45 46_FILE_BUG_SUITES = ['au', 'bvt', 'bvt-cq', 'bvt-inline', 'paygen_au_beta', 47 'paygen_au_canary', 'paygen_au_dev', 'paygen_au_stable', 48 'sanity', 'push_to_prod'] 49_AUTOTEST_DIR = global_config.global_config.get_config_value( 50 'SCHEDULER', 'drone_installation_directory') 51 52 53class RetryHandler(object): 54 """Maintain retry information. 55 56 @var _retry_map: A dictionary that stores retry history. 57 The key is afe job id. The value is a dictionary. 58 {job_id: {'state':RetryHandler.States, 'retry_max':int}} 59 - state: 60 The retry state of a job. 61 NOT_ATTEMPTED: 62 We haven't done anything about the job. 63 ATTEMPTED: 64 We've made an attempt to schedule a retry job. The 65 scheduling may or may not be successful, e.g. 66 it might encounter an rpc error. Note failure 67 in scheduling a retry is different from a retry job failure. 68 For each job, we only attempt to schedule a retry once. 69 For example, assume we have a test with JOB_RETRIES=5 and 70 its second retry job failed. When we attempt to create 71 a third retry job to retry the second, we hit an rpc 72 error. In such case, we will give up on all following 73 retries. 74 RETRIED: 75 A retry job has already been successfully 76 scheduled. 77 - retry_max: 78 The maximum of times the job can still 79 be retried, taking into account retries 80 that have occurred. 81 @var _retry_level: A retry might be triggered only if the result 82 is worse than the level. 83 @var _max_retries: Maximum retry limit at suite level. 84 Regardless how many times each individual test 85 has been retried, the total number of retries happening in 86 the suite can't exceed _max_retries. 87 """ 88 89 States = enum.Enum('NOT_ATTEMPTED', 'ATTEMPTED', 'RETRIED', 90 start_value=1, step=1) 91 92 def __init__(self, initial_jobs_to_tests, retry_level='WARN', 93 max_retries=None): 94 """Initialize RetryHandler. 95 96 @param initial_jobs_to_tests: A dictionary that maps a job id to 97 a ControlData object. This dictionary should contain 98 jobs that are originally scheduled by the suite. 99 @param retry_level: A retry might be triggered only if the result is 100 worse than the level. 101 @param max_retries: Integer, maxmium total retries allowed 102 for the suite. Default to None, no max. 103 """ 104 self._retry_map = {} 105 self._retry_level = retry_level 106 self._max_retries = (max_retries 107 if max_retries is not None else sys.maxint) 108 for job_id, test in initial_jobs_to_tests.items(): 109 if test.job_retries > 0: 110 self._add_job(new_job_id=job_id, 111 retry_max=test.job_retries) 112 else: 113 logging.debug("Test %s has no retries", test.name) 114 115 116 def _add_job(self, new_job_id, retry_max): 117 """Add a newly-created job to the retry map. 118 119 @param new_job_id: The afe_job_id of a newly created job. 120 @param retry_max: The maximum of times that we could retry 121 the test if the job fails. 122 123 @raises ValueError if new_job_id is already in retry map. 124 125 """ 126 if new_job_id in self._retry_map: 127 raise ValueError('add_job called when job is already in retry map.') 128 129 self._retry_map[new_job_id] = { 130 'state': self.States.NOT_ATTEMPTED, 131 'retry_max': retry_max} 132 133 134 def _suite_max_reached(self): 135 """Return whether maximum retry limit for a suite has been reached.""" 136 return self._max_retries <= 0 137 138 139 def add_retry(self, old_job_id, new_job_id): 140 """Record a retry. 141 142 Update retry map with the retry information. 143 144 @param old_job_id: The afe_job_id of the job that is retried. 145 @param new_job_id: The afe_job_id of the retry job. 146 147 @raises KeyError if old_job_id isn't in the retry map. 148 @raises ValueError if we have already retried or made an attempt 149 to retry the old job. 150 151 """ 152 old_record = self._retry_map[old_job_id] 153 if old_record['state'] != self.States.NOT_ATTEMPTED: 154 raise ValueError( 155 'We have already retried or attempted to retry job %d' % 156 old_job_id) 157 old_record['state'] = self.States.RETRIED 158 self._add_job(new_job_id=new_job_id, 159 retry_max=old_record['retry_max'] - 1) 160 self._max_retries -= 1 161 162 163 def set_attempted(self, job_id): 164 """Set the state of the job to ATTEMPTED. 165 166 @param job_id: afe_job_id of a job. 167 168 @raises KeyError if job_id isn't in the retry map. 169 @raises ValueError if the current state is not NOT_ATTEMPTED. 170 171 """ 172 current_state = self._retry_map[job_id]['state'] 173 if current_state != self.States.NOT_ATTEMPTED: 174 # We are supposed to retry or attempt to retry each job 175 # only once. Raise an error if this is not the case. 176 raise ValueError('Unexpected state transition: %s -> %s' % 177 (self.States.get_string(current_state), 178 self.States.get_string(self.States.ATTEMPTED))) 179 else: 180 self._retry_map[job_id]['state'] = self.States.ATTEMPTED 181 182 183 def has_following_retry(self, result): 184 """Check whether there will be a following retry. 185 186 We have the following cases for a given job id (result.id), 187 - no retry map entry -> retry not required, no following retry 188 - has retry map entry: 189 - already retried -> has following retry 190 - has not retried 191 (this branch can be handled by checking should_retry(result)) 192 - retry_max == 0 --> the last retry job, no more retry 193 - retry_max > 0 194 - attempted, but has failed in scheduling a 195 following retry due to rpc error --> no more retry 196 - has not attempped --> has following retry if test failed. 197 198 @param result: A result, encapsulating the status of the job. 199 200 @returns: True, if there will be a following retry. 201 False otherwise. 202 203 """ 204 return (result.test_executed 205 and result.id in self._retry_map 206 and (self._retry_map[result.id]['state'] == self.States.RETRIED 207 or self._should_retry(result))) 208 209 210 def _should_retry(self, result): 211 """Check whether we should retry a job based on its result. 212 213 We will retry the job that corresponds to the result 214 when all of the following are true. 215 a) The test was actually executed, meaning that if 216 a job was aborted before it could ever reach the state 217 of 'Running', the job will not be retried. 218 b) The result is worse than |self._retry_level| which 219 defaults to 'WARN'. 220 c) The test requires retry, i.e. the job has an entry in the retry map. 221 d) We haven't made any retry attempt yet, i.e. state == NOT_ATTEMPTED 222 Note that if a test has JOB_RETRIES=5, and the second time 223 it was retried it hit an rpc error, we will give up on 224 all following retries. 225 e) The job has not reached its retry max, i.e. retry_max > 0 226 227 @param result: A result, encapsulating the status of the job. 228 229 @returns: True if we should retry the job. 230 231 """ 232 return ( 233 result.test_executed 234 and result.id in self._retry_map 235 and not self._suite_max_reached() 236 and result.is_worse_than( 237 job_status.Status(self._retry_level, '', 'reason')) 238 and self._retry_map[result.id]['state'] == self.States.NOT_ATTEMPTED 239 and self._retry_map[result.id]['retry_max'] > 0 240 ) 241 242 def _should_retry_local_job(self, job_id): 243 """Check whether we should retry a job based on information available 244 for a local job without a Result object. 245 246 We will retry the job that corresponds to the result 247 when all of the following are true. 248 a) The test requires retry, i.e. the job has an entry in the retry map. 249 b) We haven't made any retry attempt yet for this job, i.e. 250 state == NOT_ATTEMPTED 251 If the job is aborted, we will give up on all following retries, 252 regardless of max_retries. 253 c) The job has not reached its retry max, i.e. retry_max > 0 254 255 @param job_id: the id for the job, to look up relevant information. 256 257 @returns: True if we should retry the job. 258 259 """ 260 if self._suite_max_reached(): 261 logging.debug('suite max_retries reached, not retrying.') 262 return False 263 if job_id not in self._retry_map: 264 logging.debug('job_id not in retry map, not retrying.') 265 return False 266 if self._retry_map[job_id]['state'] != self.States.NOT_ATTEMPTED: 267 logging.debug("job state was %s not 'Not Attempted', not retrying", 268 self._retry_map[job_id]['state']) 269 return False 270 if self._retry_map[job_id]['retry_max'] <= 0: 271 logging.debug('test-level retries exhausted, not retrying') 272 return False 273 return True 274 275 276 def job_present(self, job_id): 277 """Check whether a job id present in the retry map. 278 279 @param job_id: afe_job_id of a job. 280 281 @returns: A True if the job is present, False if not. 282 """ 283 return bool(self._retry_map.get(job_id)) 284 285 286 287 def get_retry_max(self, job_id): 288 """Get the maximum times the job can still be retried. 289 290 @param job_id: afe_job_id of a job. 291 292 @returns: An int, representing the maximum times the job can still be 293 retried. 294 @raises KeyError if job_id isn't in the retry map. 295 296 """ 297 return self._retry_map[job_id]['retry_max'] 298 299 300class _SuiteChildJobCreator(object): 301 """Create test jobs for a suite.""" 302 303 def __init__( 304 self, 305 tag, 306 builds, 307 board, 308 afe=None, 309 max_runtime_mins=24*60, 310 timeout_mins=24*60, 311 suite_job_id=None, 312 ignore_deps=False, 313 extra_deps=(), 314 priority=priorities.Priority.DEFAULT, 315 offload_failures_only=False, 316 test_source_build=None, 317 job_keyvals=None, 318 ): 319 """ 320 Constructor 321 322 @param tag: a string with which to tag jobs run in this suite. 323 @param builds: the builds on which we're running this suite. 324 @param board: the board on which we're running this suite. 325 @param afe: an instance of AFE as defined in server/frontend.py. 326 @param max_runtime_mins: Maximum suite runtime, in minutes. 327 @param timeout_mins: Maximum job lifetime, in minutes. 328 @param suite_job_id: Job id that will act as parent id to all sub jobs. 329 Default: None 330 @param ignore_deps: True if jobs should ignore the DEPENDENCIES 331 attribute and skip applying of dependency labels. 332 (Default:False) 333 @param extra_deps: A list of strings which are the extra DEPENDENCIES 334 to add to each test being scheduled. 335 @param priority: Integer priority level. Higher is more important. 336 @param offload_failures_only: Only enable gs_offloading for failed 337 jobs. 338 @param test_source_build: Build that contains the server-side test code. 339 @param job_keyvals: General job keyvals to be inserted into keyval file, 340 which will be used by tko/parse later. 341 """ 342 self._tag = tag 343 self._builds = builds 344 self._board = board 345 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30, 346 delay_sec=10, 347 debug=False) 348 self._max_runtime_mins = max_runtime_mins 349 self._timeout_mins = timeout_mins 350 self._suite_job_id = suite_job_id 351 self._ignore_deps = ignore_deps 352 self._extra_deps = tuple(extra_deps) 353 self._priority = priority 354 self._offload_failures_only = offload_failures_only 355 self._test_source_build = test_source_build 356 self._job_keyvals = job_keyvals 357 358 359 @property 360 def cros_build(self): 361 """Return the CrOS build or the first build in the builds dict.""" 362 # TODO(ayatane): Note that the builds dict isn't ordered. I'm not 363 # sure what the implications of this are, but it's probably not a 364 # good thing. 365 return self._builds.get(provision.CROS_VERSION_PREFIX, 366 self._builds.values()[0]) 367 368 369 def create_job(self, test, retry_for=None): 370 """ 371 Thin wrapper around frontend.AFE.create_job(). 372 373 @param test: ControlData object for a test to run. 374 @param retry_for: If the to-be-created job is a retry for an 375 old job, the afe_job_id of the old job will 376 be passed in as |retry_for|, which will be 377 recorded in the new job's keyvals. 378 @returns: A frontend.Job object with an added test_name member. 379 test_name is used to preserve the higher level TEST_NAME 380 name of the job. 381 """ 382 # For a system running multiple suites which share tests, the priority 383 # overridden may lead to unexpected scheduling order that adds extra 384 # provision jobs. 385 test_priority = self._priority 386 if utils.is_moblab(): 387 test_priority = max(self._priority, test.priority) 388 389 reboot_before = (model_attributes.RebootBefore.NEVER if test.fast 390 else None) 391 392 test_obj = self._afe.create_job( 393 control_file=test.text, 394 name=tools.create_job_name( 395 self._test_source_build or self.cros_build, 396 self._tag, 397 test.name), 398 control_type=test.test_type.capitalize(), 399 meta_hosts=[self._board]*test.sync_count, 400 dependencies=self._create_job_deps(test), 401 keyvals=self._create_keyvals_for_test_job(test, retry_for), 402 max_runtime_mins=self._max_runtime_mins, 403 timeout_mins=self._timeout_mins, 404 parent_job_id=self._suite_job_id, 405 reboot_before=reboot_before, 406 run_reset=not test.fast, 407 priority=test_priority, 408 synch_count=test.sync_count, 409 require_ssp=test.require_ssp) 410 411 test_obj.test_name = test.name 412 return test_obj 413 414 415 def _create_job_deps(self, test): 416 """Create job deps list for a test job. 417 418 @returns: A list of dependency strings. 419 """ 420 if self._ignore_deps: 421 job_deps = [] 422 else: 423 job_deps = list(test.dependencies) 424 job_deps.extend(self._extra_deps) 425 return job_deps 426 427 428 def _create_keyvals_for_test_job(self, test, retry_for=None): 429 """Create keyvals dict for creating a test job. 430 431 @param test: ControlData object for a test to run. 432 @param retry_for: If the to-be-created job is a retry for an 433 old job, the afe_job_id of the old job will 434 be passed in as |retry_for|, which will be 435 recorded in the new job's keyvals. 436 @returns: A keyvals dict for creating the test job. 437 """ 438 keyvals = { 439 constants.JOB_BUILD_KEY: self.cros_build, 440 constants.JOB_SUITE_KEY: self._tag, 441 constants.JOB_EXPERIMENTAL_KEY: test.experimental, 442 constants.JOB_BUILDS_KEY: self._builds 443 } 444 # test_source_build is saved to job_keyvals so scheduler can retrieve 445 # the build name from database when compiling autoserv commandline. 446 # This avoid a database change to add a new field in afe_jobs. 447 # 448 # Only add `test_source_build` to job keyvals if the build is different 449 # from the CrOS build or the job uses more than one build, e.g., both 450 # firmware and CrOS will be updated in the dut. 451 # This is for backwards compatibility, so the update Autotest code can 452 # compile an autoserv command line to run in a SSP container using 453 # previous builds. 454 if (self._test_source_build and 455 (self.cros_build != self._test_source_build or 456 len(self._builds) > 1)): 457 keyvals[constants.JOB_TEST_SOURCE_BUILD_KEY] = \ 458 self._test_source_build 459 for prefix, build in self._builds.iteritems(): 460 if prefix == provision.FW_RW_VERSION_PREFIX: 461 keyvals[constants.FWRW_BUILD]= build 462 elif prefix == provision.FW_RO_VERSION_PREFIX: 463 keyvals[constants.FWRO_BUILD] = build 464 # Add suite job id to keyvals so tko parser can read it from keyval 465 # file. 466 if self._suite_job_id: 467 keyvals[constants.PARENT_JOB_ID] = self._suite_job_id 468 # We drop the old job's id in the new job's keyval file so that 469 # later our tko parser can figure out the retry relationship and 470 # invalidate the results of the old job in tko database. 471 if retry_for: 472 keyvals[constants.RETRY_ORIGINAL_JOB_ID] = retry_for 473 if self._offload_failures_only: 474 keyvals[constants.JOB_OFFLOAD_FAILURES_KEY] = True 475 if self._job_keyvals: 476 for key in constants.INHERITED_KEYVALS: 477 if key in self._job_keyvals: 478 keyvals[key] = self._job_keyvals[key] 479 return keyvals 480 481 482class _ControlFileRetriever(object): 483 """Retrieves control files. 484 485 This returns control data instances, unlike control file getters 486 which simply return the control file text contents. 487 """ 488 489 def __init__(self, cf_getter, forgiving_parser=True, run_prod_code=False, 490 test_args=None): 491 """Initialize instance. 492 493 @param cf_getter: a control_file_getter.ControlFileGetter used to list 494 and fetch the content of control files 495 @param forgiving_parser: If False, will raise ControlVariableExceptions 496 if any are encountered when parsing control 497 files. Note that this can raise an exception 498 for syntax errors in unrelated files, because 499 we parse them before applying the predicate. 500 @param run_prod_code: If true, the retrieved tests will run the test 501 code that lives in prod aka the test code 502 currently on the lab servers by disabling 503 SSP for the discovered tests. 504 @param test_args: A dict of args to be seeded in test control file under 505 the name |args_dict|. 506 """ 507 self._cf_getter = cf_getter 508 self._forgiving_parser = forgiving_parser 509 self._run_prod_code = run_prod_code 510 self._test_args = test_args 511 512 513 def retrieve_for_test(self, test_name): 514 """Retrieve a test's control data. 515 516 This ignores forgiving_parser because we cannot return a 517 forgiving value. 518 519 @param test_name: Name of test to retrieve. 520 521 @raises ControlVariableException: There is a syntax error in a 522 control file. 523 524 @returns a ControlData object 525 """ 526 return suite_common.retrieve_control_data_for_test( 527 self._cf_getter, test_name) 528 529 530 def retrieve_for_suite(self, suite_name=''): 531 """Scan through all tests and find all tests. 532 533 @param suite_name: If specified, this method will attempt to restrain 534 the search space to just this suite's control files. 535 536 @raises ControlVariableException: If forgiving_parser is False and there 537 is a syntax error in a control file. 538 539 @returns a dictionary of ControlData objects that based on given 540 parameters. 541 """ 542 tests = suite_common.retrieve_for_suite( 543 self._cf_getter, suite_name, self._forgiving_parser, 544 self._test_args) 545 if self._run_prod_code: 546 for test in tests.itervalues(): 547 test.require_ssp = False 548 549 return tests 550 551 552def list_all_suites(build, devserver, cf_getter=None): 553 """ 554 Parses all ControlData objects with a SUITE tag and extracts all 555 defined suite names. 556 557 @param build: the build on which we're running this suite. 558 @param devserver: the devserver which contains the build. 559 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to 560 using DevServerGetter. 561 562 @return list of suites 563 """ 564 if cf_getter is None: 565 cf_getter = _create_ds_getter(build, devserver) 566 567 suites = set() 568 predicate = lambda t: True 569 for test in find_and_parse_tests(cf_getter, predicate): 570 suites.update(test.suite_tag_parts) 571 return list(suites) 572 573 574def test_file_similarity_predicate(test_file_pattern): 575 """Returns predicate that gets the similarity based on a test's file 576 name pattern. 577 578 Builds a predicate that takes in a parsed control file (a ControlData) 579 and returns a tuple of (file path, ratio), where ratio is the 580 similarity between the test file name and the given test_file_pattern. 581 582 @param test_file_pattern: regular expression (string) to match against 583 control file names. 584 @return a callable that takes a ControlData and and returns a tuple of 585 (file path, ratio), where ratio is the similarity between the 586 test file name and the given test_file_pattern. 587 """ 588 return lambda t: ((None, 0) if not hasattr(t, 'path') else 589 (t.path, difflib.SequenceMatcher(a=t.path, 590 b=test_file_pattern).ratio())) 591 592 593def test_name_similarity_predicate(test_name): 594 """Returns predicate that matched based on a test's name. 595 596 Builds a predicate that takes in a parsed control file (a ControlData) 597 and returns a tuple of (test name, ratio), where ratio is the similarity 598 between the test name and the given test_name. 599 600 @param test_name: the test name to base the predicate on. 601 @return a callable that takes a ControlData and returns a tuple of 602 (test name, ratio), where ratio is the similarity between the 603 test name and the given test_name. 604 """ 605 return lambda t: ((None, 0) if not hasattr(t, 'name') else 606 (t.name, 607 difflib.SequenceMatcher(a=t.name, b=test_name).ratio())) 608 609 610def matches_attribute_expression_predicate(test_attr_boolstr): 611 """Returns predicate that matches based on boolean expression of 612 attributes. 613 614 Builds a predicate that takes in a parsed control file (a ControlData) 615 ans returns True if the test attributes satisfy the given attribute 616 boolean expression. 617 618 @param test_attr_boolstr: boolean expression of the attributes to be 619 test, like 'system:all and interval:daily'. 620 621 @return a callable that takes a ControlData and returns True if the test 622 attributes satisfy the given boolean expression. 623 """ 624 return lambda t: boolparse_lib.BoolstrResult( 625 test_attr_boolstr, t.attributes) 626 627 628def test_file_matches_pattern_predicate(test_file_pattern): 629 """Returns predicate that matches based on a test's file name pattern. 630 631 Builds a predicate that takes in a parsed control file (a ControlData) 632 and returns True if the test's control file name matches the given 633 regular expression. 634 635 @param test_file_pattern: regular expression (string) to match against 636 control file names. 637 @return a callable that takes a ControlData and and returns 638 True if control file name matches the pattern. 639 """ 640 return lambda t: hasattr(t, 'path') and re.match(test_file_pattern, 641 t.path) 642 643 644def test_name_matches_pattern_predicate(test_name_pattern): 645 """Returns predicate that matches based on a test's name pattern. 646 647 Builds a predicate that takes in a parsed control file (a ControlData) 648 and returns True if the test name matches the given regular expression. 649 650 @param test_name_pattern: regular expression (string) to match against 651 test names. 652 @return a callable that takes a ControlData and returns 653 True if the name fields matches the pattern. 654 """ 655 return lambda t: hasattr(t, 'name') and re.match(test_name_pattern, 656 t.name) 657 658 659def test_name_equals_predicate(test_name): 660 """Returns predicate that matched based on a test's name. 661 662 Builds a predicate that takes in a parsed control file (a ControlData) 663 and returns True if the test name is equal to |test_name|. 664 665 @param test_name: the test name to base the predicate on. 666 @return a callable that takes a ControlData and looks for |test_name| 667 in that ControlData's name. 668 """ 669 return lambda t: hasattr(t, 'name') and test_name == t.name 670 671 672def name_in_tag_similarity_predicate(name): 673 """Returns predicate that takes a control file and gets the similarity 674 of the suites in the control file and the given name. 675 676 Builds a predicate that takes in a parsed control file (a ControlData) 677 and returns a list of tuples of (suite name, ratio), where suite name 678 is each suite listed in the control file, and ratio is the similarity 679 between each suite and the given name. 680 681 @param name: the suite name to base the predicate on. 682 @return a callable that takes a ControlData and returns a list of tuples 683 of (suite name, ratio), where suite name is each suite listed in 684 the control file, and ratio is the similarity between each suite 685 and the given name. 686 """ 687 return lambda t: [(suite, 688 difflib.SequenceMatcher(a=suite, b=name).ratio()) 689 for suite in t.suite_tag_parts] or [(None, 0)] 690 691 692def name_in_tag_predicate(name): 693 """Returns predicate that takes a control file and looks for |name|. 694 695 Builds a predicate that takes in a parsed control file (a ControlData) 696 and returns True if the SUITE tag is present and contains |name|. 697 698 @param name: the suite name to base the predicate on. 699 @return a callable that takes a ControlData and looks for |name| in that 700 ControlData object's suite member. 701 """ 702 return suite_common.name_in_tag_predicate(name) 703 704 705def create_fs_getter(autotest_dir): 706 """ 707 @param autotest_dir: the place to find autotests. 708 @return a FileSystemGetter instance that looks under |autotest_dir|. 709 """ 710 # currently hard-coded places to look for tests. 711 subpaths = ['server/site_tests', 'client/site_tests', 712 'server/tests', 'client/tests'] 713 directories = [os.path.join(autotest_dir, p) for p in subpaths] 714 return control_file_getter.FileSystemGetter(directories) 715 716 717def _create_ds_getter(build, devserver): 718 """ 719 @param build: the build on which we're running this suite. 720 @param devserver: the devserver which contains the build. 721 @return a FileSystemGetter instance that looks under |autotest_dir|. 722 """ 723 return control_file_getter.DevServerGetter(build, devserver) 724 725 726def _non_experimental_tests_predicate(test_data): 727 """Test predicate for non-experimental tests.""" 728 return not test_data.experimental 729 730 731def find_and_parse_tests(cf_getter, predicate, suite_name='', 732 add_experimental=False, forgiving_parser=True, 733 run_prod_code=False, test_args=None): 734 """ 735 Function to scan through all tests and find eligible tests. 736 737 Search through all tests based on given cf_getter, suite_name, 738 add_experimental and forgiving_parser, return the tests that match 739 given predicate. 740 741 @param cf_getter: a control_file_getter.ControlFileGetter used to list 742 and fetch the content of control files 743 @param predicate: a function that should return True when run over a 744 ControlData representation of a control file that should be in 745 this Suite. 746 @param suite_name: If specified, this method will attempt to restrain 747 the search space to just this suite's control files. 748 @param add_experimental: add tests with experimental attribute set. 749 @param forgiving_parser: If False, will raise ControlVariableExceptions 750 if any are encountered when parsing control 751 files. Note that this can raise an exception 752 for syntax errors in unrelated files, because 753 we parse them before applying the predicate. 754 @param run_prod_code: If true, the suite will run the test code that 755 lives in prod aka the test code currently on the 756 lab servers by disabling SSP for the discovered 757 tests. 758 @param test_args: A dict of args to be seeded in test control file. 759 760 @raises ControlVariableException: If forgiving_parser is False and there 761 is a syntax error in a control file. 762 763 @return list of ControlData objects that should be run, with control 764 file text added in |text| attribute. Results are sorted based 765 on the TIME setting in control file, slowest test comes first. 766 """ 767 logging.debug('Getting control file list for suite: %s', suite_name) 768 retriever = _ControlFileRetriever(cf_getter, 769 forgiving_parser=forgiving_parser, 770 run_prod_code=run_prod_code, 771 test_args=test_args) 772 tests = retriever.retrieve_for_suite(suite_name) 773 if not add_experimental: 774 predicate = _ComposedPredicate([predicate, 775 _non_experimental_tests_predicate]) 776 return suite_common.filter_tests(tests, predicate) 777 778 779def find_possible_tests(cf_getter, predicate, suite_name='', count=10): 780 """ 781 Function to scan through all tests and find possible tests. 782 783 Search through all tests based on given cf_getter, suite_name, 784 add_experimental and forgiving_parser. Use the given predicate to 785 calculate the similarity and return the top 10 matches. 786 787 @param cf_getter: a control_file_getter.ControlFileGetter used to list 788 and fetch the content of control files 789 @param predicate: a function that should return a tuple of (name, ratio) 790 when run over a ControlData representation of a control file that 791 should be in this Suite. `name` is the key to be compared, e.g., 792 a suite name or test name. `ratio` is a value between [0,1] 793 indicating the similarity of `name` and the value to be compared. 794 @param suite_name: If specified, this method will attempt to restrain 795 the search space to just this suite's control files. 796 @param count: Number of suggestions to return, default to 10. 797 798 @return list of top names that similar to the given test, sorted by 799 match ratio. 800 """ 801 logging.debug('Getting control file list for suite: %s', suite_name) 802 tests = _ControlFileRetriever(cf_getter).retrieve_for_suite(suite_name) 803 logging.debug('Parsed %s control files.', len(tests)) 804 similarities = {} 805 for test in tests.itervalues(): 806 ratios = predicate(test) 807 # Some predicates may return a list of tuples, e.g., 808 # name_in_tag_similarity_predicate. Convert all returns to a list. 809 if not isinstance(ratios, list): 810 ratios = [ratios] 811 for name, ratio in ratios: 812 similarities[name] = ratio 813 return [s[0] for s in 814 sorted(similarities.items(), key=operator.itemgetter(1), 815 reverse=True)][:count] 816 817 818def _deprecated_suite_method(func): 819 """Decorator for deprecated Suite static methods. 820 821 TODO(ayatane): This is used to decorate functions that are called as 822 static methods on Suite. 823 """ 824 @functools.wraps(func) 825 def wrapper(*args, **kwargs): 826 """Wraps |func| for warning.""" 827 warnings.warn('Calling method "%s" from Suite is deprecated' % 828 func.__name__) 829 return func(*args, **kwargs) 830 return staticmethod(wrapper) 831 832 833class _BaseSuite(object): 834 """ 835 A suite of tests, defined by some predicate over control file variables. 836 837 Given a place to search for control files a predicate to match the desired 838 tests, can gather tests and fire off jobs to run them, and then wait for 839 results. 840 841 @var _predicate: a function that should return True when run over a 842 ControlData representation of a control file that should be in 843 this Suite. 844 @var _tag: a string with which to tag jobs run in this suite. 845 @var _builds: the builds on which we're running this suite. 846 @var _afe: an instance of AFE as defined in server/frontend.py. 847 @var _tko: an instance of TKO as defined in server/frontend.py. 848 @var _jobs: currently scheduled jobs, if any. 849 @var _jobs_to_tests: a dictionary that maps job ids to tests represented 850 ControlData objects. 851 @var _retry: a bool value indicating whether jobs should be retried on 852 failure. 853 @var _retry_handler: a RetryHandler object. 854 855 """ 856 857 858 def __init__( 859 self, 860 tests, 861 tag, 862 builds, 863 board, 864 afe=None, 865 tko=None, 866 pool=None, 867 results_dir=None, 868 max_runtime_mins=24*60, 869 timeout_mins=24*60, 870 file_bugs=False, 871 suite_job_id=None, 872 ignore_deps=False, 873 extra_deps=None, 874 priority=priorities.Priority.DEFAULT, 875 wait_for_results=True, 876 job_retry=False, 877 max_retries=sys.maxint, 878 offload_failures_only=False, 879 test_source_build=None, 880 job_keyvals=None, 881 child_dependencies=(), 882 result_reporter=None, 883 ): 884 """Initialize instance. 885 886 @param tests: Iterable of tests to run. 887 @param tag: a string with which to tag jobs run in this suite. 888 @param builds: the builds on which we're running this suite. 889 @param board: the board on which we're running this suite. 890 @param afe: an instance of AFE as defined in server/frontend.py. 891 @param tko: an instance of TKO as defined in server/frontend.py. 892 @param pool: Specify the pool of machines to use for scheduling 893 purposes. 894 @param results_dir: The directory where the job can write results to. 895 This must be set if you want job_id of sub-jobs 896 list in the job keyvals. 897 @param max_runtime_mins: Maximum suite runtime, in minutes. 898 @param timeout: Maximum job lifetime, in hours. 899 @param suite_job_id: Job id that will act as parent id to all sub jobs. 900 Default: None 901 @param ignore_deps: True if jobs should ignore the DEPENDENCIES 902 attribute and skip applying of dependency labels. 903 (Default:False) 904 @param extra_deps: A list of strings which are the extra DEPENDENCIES 905 to add to each test being scheduled. 906 @param priority: Integer priority level. Higher is more important. 907 @param wait_for_results: Set to False to run the suite job without 908 waiting for test jobs to finish. Default is 909 True. 910 @param job_retry: A bool value indicating whether jobs should be retried 911 on failure. If True, the field 'JOB_RETRIES' in 912 control files will be respected. If False, do not 913 retry. 914 @param max_retries: Maximum retry limit at suite level. 915 Regardless how many times each individual test 916 has been retried, the total number of retries 917 happening in the suite can't exceed _max_retries. 918 Default to sys.maxint. 919 @param offload_failures_only: Only enable gs_offloading for failed 920 jobs. 921 @param test_source_build: Build that contains the server-side test code. 922 @param job_keyvals: General job keyvals to be inserted into keyval file, 923 which will be used by tko/parse later. 924 @param child_dependencies: (optional) list of dependency strings 925 to be added as dependencies to child jobs. 926 @param result_reporter: A _ResultReporter instance to report results. If 927 None, an _EmailReporter will be created. 928 """ 929 930 self.tests = list(tests) 931 self._tag = tag 932 self._builds = builds 933 self._results_dir = results_dir 934 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30, 935 delay_sec=10, 936 debug=False) 937 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30, 938 delay_sec=10, 939 debug=False) 940 self._jobs = [] 941 self._jobs_to_tests = {} 942 943 self._file_bugs = file_bugs 944 self._suite_job_id = suite_job_id 945 self._job_retry=job_retry 946 self._max_retries = max_retries 947 # RetryHandler to be initialized in schedule() 948 self._retry_handler = None 949 self.wait_for_results = wait_for_results 950 self._job_keyvals = job_keyvals 951 if result_reporter is None: 952 self._result_reporter = _EmailReporter(self) 953 else: 954 self._result_reporter = result_reporter 955 956 if extra_deps is None: 957 extra_deps = [] 958 extra_deps.append(board) 959 if pool: 960 extra_deps.append(pool) 961 extra_deps.extend(child_dependencies) 962 self._dependencies = tuple(extra_deps) 963 964 self._job_creator = _SuiteChildJobCreator( 965 tag=tag, 966 builds=builds, 967 board=board, 968 afe=afe, 969 max_runtime_mins=max_runtime_mins, 970 timeout_mins=timeout_mins, 971 suite_job_id=suite_job_id, 972 ignore_deps=ignore_deps, 973 extra_deps=extra_deps, 974 priority=priority, 975 offload_failures_only=offload_failures_only, 976 test_source_build=test_source_build, 977 job_keyvals=job_keyvals, 978 ) 979 980 981 def _schedule_test(self, record, test, retry_for=None): 982 """Schedule a single test and return the job. 983 984 Schedule a single test by creating a job, and then update relevant 985 data structures that are used to keep track of all running jobs. 986 987 Emits a TEST_NA status log entry if it failed to schedule the test due 988 to NoEligibleHostException or a non-existent board label. 989 990 Returns a frontend.Job object if the test is successfully scheduled. 991 If scheduling failed due to NoEligibleHostException or a non-existent 992 board label, returns None. 993 994 @param record: A callable to use for logging. 995 prototype: record(base_job.status_log_entry) 996 @param test: ControlData for a test to run. 997 @param retry_for: If we are scheduling a test to retry an 998 old job, the afe_job_id of the old job 999 will be passed in as |retry_for|. 1000 1001 @returns: A frontend.Job object or None 1002 """ 1003 msg = 'Scheduling %s' % test.name 1004 if retry_for: 1005 msg = msg + ', to retry afe job %d' % retry_for 1006 logging.debug(msg) 1007 begin_time_str = datetime.datetime.now().strftime(time_utils.TIME_FMT) 1008 try: 1009 job = self._job_creator.create_job(test, retry_for=retry_for) 1010 except (error.NoEligibleHostException, proxy.ValidationError) as e: 1011 if (isinstance(e, error.NoEligibleHostException) 1012 or (isinstance(e, proxy.ValidationError) 1013 and _is_nonexistent_board_error(e))): 1014 # Treat a dependency on a non-existent board label the same as 1015 # a dependency on a board that exists, but for which there's no 1016 # hardware. 1017 logging.debug('%s not applicable for this board/pool. ' 1018 'Emitting TEST_NA.', test.name) 1019 Status('TEST_NA', test.name, 1020 'Skipping: test not supported on this board/pool.', 1021 begin_time_str=begin_time_str).record_all(record) 1022 return None 1023 else: 1024 raise e 1025 except (error.RPCException, proxy.JSONRPCException): 1026 if retry_for: 1027 # Mark that we've attempted to retry the old job. 1028 logging.debug("RPC exception occurred") 1029 self._retry_handler.set_attempted(job_id=retry_for) 1030 raise 1031 else: 1032 self._jobs.append(job) 1033 self._jobs_to_tests[job.id] = test 1034 if retry_for: 1035 # A retry job was just created, record it. 1036 self._retry_handler.add_retry( 1037 old_job_id=retry_for, new_job_id=job.id) 1038 retry_count = (test.job_retries - 1039 self._retry_handler.get_retry_max(job.id)) 1040 logging.debug('Job %d created to retry job %d. ' 1041 'Have retried for %d time(s)', 1042 job.id, retry_for, retry_count) 1043 self._remember_job_keyval(job) 1044 return job 1045 1046 def schedule(self, record): 1047 """ 1048 Schedule jobs using |self._afe|. 1049 1050 frontend.Job objects representing each scheduled job will be put in 1051 |self._jobs|. 1052 1053 @param record: A callable to use for logging. 1054 prototype: record(base_job.status_log_entry) 1055 @returns: The number of tests that were scheduled. 1056 """ 1057 scheduled_test_names = [] 1058 logging.debug('Discovered %d tests.', len(self.tests)) 1059 1060 Status('INFO', 'Start %s' % self._tag).record_result(record) 1061 try: 1062 # Write job_keyvals into keyval file. 1063 if self._job_keyvals: 1064 utils.write_keyval(self._results_dir, self._job_keyvals) 1065 1066 # TODO(crbug.com/730885): This is a hack to protect tests that are 1067 # not usually retried from getting hit by a provision error when run 1068 # as part of a suite. Remove this hack once provision is separated 1069 # out in its own suite. 1070 self._bump_up_test_retries(self.tests) 1071 for test in self.tests: 1072 scheduled_job = self._schedule_test(record, test) 1073 if scheduled_job is not None: 1074 scheduled_test_names.append(test.name) 1075 1076 # Write the num of scheduled tests and name of them to keyval file. 1077 logging.debug('Scheduled %d tests, writing the total to keyval.', 1078 len(scheduled_test_names)) 1079 utils.write_keyval( 1080 self._results_dir, 1081 self._make_scheduled_tests_keyvals(scheduled_test_names)) 1082 except Exception: 1083 logging.exception('Exception while scheduling suite') 1084 Status('FAIL', self._tag, 1085 'Exception while scheduling suite').record_result(record) 1086 1087 if self._job_retry: 1088 logging.debug("Initializing RetryHandler for suite %s.", self._tag) 1089 self._retry_handler = RetryHandler( 1090 initial_jobs_to_tests=self._jobs_to_tests, 1091 max_retries=self._max_retries) 1092 logging.debug("retry map created: %s ", 1093 self._retry_handler._retry_map) 1094 else: 1095 logging.info("Will not retry jobs from suite %s.", self._tag) 1096 return len(scheduled_test_names) 1097 1098 1099 def _bump_up_test_retries(self, tests): 1100 """Bump up individual test retries to match suite retry options.""" 1101 if not self._job_retry: 1102 return 1103 1104 for test in tests: 1105 # We do honor if a test insists on JOB_RETRIES = 0. 1106 if test.job_retries is None: 1107 logging.debug( 1108 'Test %s did not request retries, but suite requires ' 1109 'retries. Bumping retries up to 1. ' 1110 '(See crbug.com/730885)', 1111 test.name) 1112 test.job_retries = 1 1113 1114 1115 def _make_scheduled_tests_keyvals(self, scheduled_test_names): 1116 """Make a keyvals dict to write for scheduled test names. 1117 1118 @param scheduled_test_names: A list of scheduled test name strings. 1119 1120 @returns: A keyvals dict. 1121 """ 1122 return { 1123 constants.SCHEDULED_TEST_COUNT_KEY: len(scheduled_test_names), 1124 constants.SCHEDULED_TEST_NAMES_KEY: repr(scheduled_test_names), 1125 } 1126 1127 1128 def _should_report(self, result): 1129 """ 1130 Returns True if this failure requires to be reported. 1131 1132 @param result: A result, encapsulating the status of the failed job. 1133 @return: True if we should report this failure. 1134 """ 1135 return (self._file_bugs and result.test_executed and 1136 not result.is_testna() and 1137 result.is_worse_than(job_status.Status('GOOD', '', 'reason'))) 1138 1139 1140 def _has_retry(self, result): 1141 """ 1142 Return True if this result gets to retry. 1143 1144 @param result: A result, encapsulating the status of the failed job. 1145 @return: bool 1146 """ 1147 return (self._job_retry 1148 and self._retry_handler.has_following_retry(result)) 1149 1150 1151 def wait(self, record): 1152 """ 1153 Polls for the job statuses, using |record| to print status when each 1154 completes. 1155 1156 @param record: callable that records job status. 1157 prototype: 1158 record(base_job.status_log_entry) 1159 """ 1160 waiter = job_status.JobResultWaiter(self._afe, self._tko) 1161 try: 1162 if self._suite_job_id: 1163 jobs = self._afe.get_jobs(parent_job_id=self._suite_job_id) 1164 else: 1165 logging.warning('Unknown suite_job_id, falling back to less ' 1166 'efficient results_generator.') 1167 jobs = self._jobs 1168 waiter.add_jobs(jobs) 1169 for result in waiter.wait_for_results(): 1170 self._handle_result(result=result, record=record, waiter=waiter) 1171 if self._finished_waiting(): 1172 break 1173 except Exception: # pylint: disable=W0703 1174 logging.exception('Exception waiting for results') 1175 Status('FAIL', self._tag, 1176 'Exception waiting for results').record_result(record) 1177 1178 1179 def _finished_waiting(self): 1180 """Return whether the suite is finished waiting for child jobs.""" 1181 return False 1182 1183 1184 def _handle_result(self, result, record, waiter): 1185 """ 1186 Handle a test job result. 1187 1188 @param result: Status instance for job. 1189 @param record: callable that records job status. 1190 prototype: 1191 record(base_job.status_log_entry) 1192 @param waiter: JobResultsWaiter instance. 1193 1194 @instance_param _result_reporter: _ResultReporter instance. 1195 """ 1196 self._record_result(result, record) 1197 rescheduled = False 1198 if self._job_retry and self._retry_handler._should_retry(result): 1199 rescheduled = self._retry_result(result, record, waiter) 1200 # TODO (crbug.com/751428): If the suite times out before a retry could 1201 # finish, we would lose the chance to report errors from the original 1202 # job. 1203 if self._has_retry(result) and rescheduled: 1204 return 1205 1206 if self._should_report(result): 1207 self._result_reporter.report(result) 1208 1209 def _record_result(self, result, record): 1210 """ 1211 Record a test job result. 1212 1213 @param result: Status instance for job. 1214 @param record: callable that records job status. 1215 prototype: 1216 record(base_job.status_log_entry) 1217 """ 1218 result.record_all(record) 1219 self._remember_job_keyval(result) 1220 1221 1222 def _retry_result(self, result, record, waiter): 1223 """ 1224 Retry a test job result. 1225 1226 @param result: Status instance for job. 1227 @param record: callable that records job status. 1228 prototype: 1229 record(base_job.status_log_entry) 1230 @param waiter: JobResultsWaiter instance. 1231 @returns: True if a job was scheduled for retry, False otherwise. 1232 """ 1233 test = self._jobs_to_tests[result.id] 1234 try: 1235 # It only takes effect for CQ retriable job: 1236 # 1) in first try, test.fast=True. 1237 # 2) in second try, test will be run in normal mode, so reset 1238 # test.fast=False. 1239 test.fast = False 1240 new_job = self._schedule_test( 1241 record=record, test=test, retry_for=result.id) 1242 except (error.RPCException, proxy.JSONRPCException) as e: 1243 logging.error('Failed to schedule test: %s, Reason: %s', 1244 test.name, e) 1245 return False 1246 else: 1247 waiter.add_job(new_job) 1248 return bool(new_job) 1249 1250 @property 1251 def jobs(self): 1252 """Give a copy of the associated jobs 1253 1254 @returns: array of jobs""" 1255 return [job for job in self._jobs] 1256 1257 1258 @property 1259 def _should_file_bugs(self): 1260 """Return whether bugs should be filed. 1261 1262 @returns: bool 1263 """ 1264 # File bug when failure is one of the _FILE_BUG_SUITES, 1265 # otherwise send an email to the owner anc cc. 1266 return self._tag in _FILE_BUG_SUITES 1267 1268 1269 def abort(self): 1270 """ 1271 Abort all scheduled test jobs. 1272 """ 1273 if self._jobs: 1274 job_ids = [job.id for job in self._jobs] 1275 self._afe.run('abort_host_queue_entries', job__id__in=job_ids) 1276 1277 1278 def _remember_job_keyval(self, job): 1279 """ 1280 Record provided job as a suite job keyval, for later referencing. 1281 1282 @param job: some representation of a job that has the attributes: 1283 id, test_name, and owner 1284 """ 1285 if self._results_dir and job.id and job.owner and job.test_name: 1286 job_id_owner = '%s-%s' % (job.id, job.owner) 1287 logging.debug('Adding job keyval for %s=%s', 1288 job.test_name, job_id_owner) 1289 utils.write_keyval( 1290 self._results_dir, 1291 {hashlib.md5(job.test_name).hexdigest(): job_id_owner}) 1292 1293 1294class Suite(_BaseSuite): 1295 """ 1296 A suite of tests, defined by some predicate over control file variables. 1297 1298 Given a place to search for control files a predicate to match the desired 1299 tests, can gather tests and fire off jobs to run them, and then wait for 1300 results. 1301 1302 @var _predicate: a function that should return True when run over a 1303 ControlData representation of a control file that should be in 1304 this Suite. 1305 @var _tag: a string with which to tag jobs run in this suite. 1306 @var _builds: the builds on which we're running this suite. 1307 @var _afe: an instance of AFE as defined in server/frontend.py. 1308 @var _tko: an instance of TKO as defined in server/frontend.py. 1309 @var _jobs: currently scheduled jobs, if any. 1310 @var _jobs_to_tests: a dictionary that maps job ids to tests represented 1311 ControlData objects. 1312 @var _cf_getter: a control_file_getter.ControlFileGetter 1313 @var _retry: a bool value indicating whether jobs should be retried on 1314 failure. 1315 @var _retry_handler: a RetryHandler object. 1316 1317 """ 1318 1319 # TODO(ayatane): These methods are kept on the Suite class for 1320 # backward compatibility. 1321 find_and_parse_tests = _deprecated_suite_method(find_and_parse_tests) 1322 find_possible_tests = _deprecated_suite_method(find_possible_tests) 1323 create_fs_getter = _deprecated_suite_method(create_fs_getter) 1324 name_in_tag_predicate = _deprecated_suite_method( 1325 suite_common.name_in_tag_predicate) 1326 name_in_tag_similarity_predicate = _deprecated_suite_method( 1327 name_in_tag_similarity_predicate) 1328 test_name_equals_predicate = _deprecated_suite_method( 1329 test_name_equals_predicate) 1330 test_name_in_list_predicate = _deprecated_suite_method( 1331 suite_common.test_name_in_list_predicate) 1332 test_name_matches_pattern_predicate = _deprecated_suite_method( 1333 test_name_matches_pattern_predicate) 1334 test_file_matches_pattern_predicate = _deprecated_suite_method( 1335 test_file_matches_pattern_predicate) 1336 matches_attribute_expression_predicate = _deprecated_suite_method( 1337 matches_attribute_expression_predicate) 1338 test_name_similarity_predicate = _deprecated_suite_method( 1339 test_name_similarity_predicate) 1340 test_file_similarity_predicate = _deprecated_suite_method( 1341 test_file_similarity_predicate) 1342 list_all_suites = _deprecated_suite_method(list_all_suites) 1343 get_test_source_build = _deprecated_suite_method( 1344 suite_common.get_test_source_build) 1345 1346 1347 @classmethod 1348 def create_from_predicates(cls, predicates, builds, board, devserver, 1349 cf_getter=None, name='ad_hoc_suite', 1350 run_prod_code=False, **dargs): 1351 """ 1352 Create a Suite using a given predicate test filters. 1353 1354 Uses supplied predicate(s) to instantiate a Suite. Looks for tests in 1355 |autotest_dir| and will schedule them using |afe|. Pulls control files 1356 from the default dev server. Results will be pulled from |tko| upon 1357 completion. 1358 1359 @param predicates: A list of callables that accept ControlData 1360 representations of control files. A test will be 1361 included in suite if all callables in this list 1362 return True on the given control file. 1363 @param builds: the builds on which we're running this suite. It's a 1364 dictionary of version_prefix:build. 1365 @param board: the board on which we're running this suite. 1366 @param devserver: the devserver which contains the build. 1367 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to 1368 using DevServerGetter. 1369 @param name: name of suite. Defaults to 'ad_hoc_suite' 1370 @param run_prod_code: If true, the suite will run the tests that 1371 lives in prod aka the test code currently on the 1372 lab servers. 1373 @param **dargs: Any other Suite constructor parameters, as described 1374 in Suite.__init__ docstring. 1375 @return a Suite instance. 1376 """ 1377 if cf_getter is None: 1378 if run_prod_code: 1379 cf_getter = create_fs_getter(_AUTOTEST_DIR) 1380 else: 1381 build = suite_common.get_test_source_build(builds, **dargs) 1382 cf_getter = _create_ds_getter(build, devserver) 1383 1384 return cls(predicates, 1385 name, builds, board, cf_getter, run_prod_code, **dargs) 1386 1387 1388 @classmethod 1389 def create_from_name(cls, name, builds, board, devserver, cf_getter=None, 1390 **dargs): 1391 """ 1392 Create a Suite using a predicate based on the SUITE control file var. 1393 1394 Makes a predicate based on |name| and uses it to instantiate a Suite 1395 that looks for tests in |autotest_dir| and will schedule them using 1396 |afe|. Pulls control files from the default dev server. 1397 Results will be pulled from |tko| upon completion. 1398 1399 @param name: a value of the SUITE control file variable to search for. 1400 @param builds: the builds on which we're running this suite. It's a 1401 dictionary of version_prefix:build. 1402 @param board: the board on which we're running this suite. 1403 @param devserver: the devserver which contains the build. 1404 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to 1405 using DevServerGetter. 1406 @param **dargs: Any other Suite constructor parameters, as described 1407 in Suite.__init__ docstring. 1408 @return a Suite instance. 1409 """ 1410 if cf_getter is None: 1411 build = suite_common.get_test_source_build(builds, **dargs) 1412 cf_getter = _create_ds_getter(build, devserver) 1413 1414 return cls([suite_common.name_in_tag_predicate(name)], 1415 name, builds, board, cf_getter, **dargs) 1416 1417 1418 def __init__( 1419 self, 1420 predicates, 1421 tag, 1422 builds, 1423 board, 1424 cf_getter, 1425 run_prod_code=False, 1426 afe=None, 1427 tko=None, 1428 pool=None, 1429 results_dir=None, 1430 max_runtime_mins=24*60, 1431 timeout_mins=24*60, 1432 file_bugs=False, 1433 suite_job_id=None, 1434 ignore_deps=False, 1435 extra_deps=None, 1436 priority=priorities.Priority.DEFAULT, 1437 forgiving_parser=True, 1438 wait_for_results=True, 1439 job_retry=False, 1440 max_retries=sys.maxint, 1441 offload_failures_only=False, 1442 test_source_build=None, 1443 job_keyvals=None, 1444 test_args=None, 1445 child_dependencies=(), 1446 result_reporter=None, 1447 ): 1448 """ 1449 Constructor 1450 1451 @param predicates: A list of callables that accept ControlData 1452 representations of control files. A test will be 1453 included in suite if all callables in this list 1454 return True on the given control file. 1455 @param tag: a string with which to tag jobs run in this suite. 1456 @param builds: the builds on which we're running this suite. 1457 @param board: the board on which we're running this suite. 1458 @param cf_getter: a control_file_getter.ControlFileGetter 1459 @param afe: an instance of AFE as defined in server/frontend.py. 1460 @param tko: an instance of TKO as defined in server/frontend.py. 1461 @param pool: Specify the pool of machines to use for scheduling 1462 purposes. 1463 @param run_prod_code: If true, the suite will run the test code that 1464 lives in prod aka the test code currently on the 1465 lab servers. 1466 @param results_dir: The directory where the job can write results to. 1467 This must be set if you want job_id of sub-jobs 1468 list in the job keyvals. 1469 @param max_runtime_mins: Maximum suite runtime, in minutes. 1470 @param timeout: Maximum job lifetime, in hours. 1471 @param suite_job_id: Job id that will act as parent id to all sub jobs. 1472 Default: None 1473 @param ignore_deps: True if jobs should ignore the DEPENDENCIES 1474 attribute and skip applying of dependency labels. 1475 (Default:False) 1476 @param extra_deps: A list of strings which are the extra DEPENDENCIES 1477 to add to each test being scheduled. 1478 @param priority: Integer priority level. Higher is more important. 1479 @param wait_for_results: Set to False to run the suite job without 1480 waiting for test jobs to finish. Default is 1481 True. 1482 @param job_retry: A bool value indicating whether jobs should be retried 1483 on failure. If True, the field 'JOB_RETRIES' in 1484 control files will be respected. If False, do not 1485 retry. 1486 @param max_retries: Maximum retry limit at suite level. 1487 Regardless how many times each individual test 1488 has been retried, the total number of retries 1489 happening in the suite can't exceed _max_retries. 1490 Default to sys.maxint. 1491 @param offload_failures_only: Only enable gs_offloading for failed 1492 jobs. 1493 @param test_source_build: Build that contains the server-side test code. 1494 @param job_keyvals: General job keyvals to be inserted into keyval file, 1495 which will be used by tko/parse later. 1496 @param test_args: A dict of args passed all the way to each individual 1497 test that will be actually ran. 1498 @param child_dependencies: (optional) list of dependency strings 1499 to be added as dependencies to child jobs. 1500 @param result_reporter: A _ResultReporter instance to report results. If 1501 None, an _EmailReporter will be created. 1502 """ 1503 tests = find_and_parse_tests( 1504 cf_getter, 1505 _ComposedPredicate(predicates), 1506 tag, 1507 forgiving_parser=forgiving_parser, 1508 run_prod_code=run_prod_code, 1509 test_args=test_args, 1510 ) 1511 super(Suite, self).__init__( 1512 tests=tests, 1513 tag=tag, 1514 builds=builds, 1515 board=board, 1516 afe=afe, 1517 tko=tko, 1518 pool=pool, 1519 results_dir=results_dir, 1520 max_runtime_mins=max_runtime_mins, 1521 timeout_mins=timeout_mins, 1522 file_bugs=file_bugs, 1523 suite_job_id=suite_job_id, 1524 ignore_deps=ignore_deps, 1525 extra_deps=extra_deps, 1526 priority=priority, 1527 wait_for_results=wait_for_results, 1528 job_retry=job_retry, 1529 max_retries=max_retries, 1530 offload_failures_only=offload_failures_only, 1531 test_source_build=test_source_build, 1532 job_keyvals=job_keyvals, 1533 child_dependencies=child_dependencies, 1534 result_reporter=result_reporter, 1535 ) 1536 1537 1538class ProvisionSuite(_BaseSuite): 1539 """ 1540 A suite for provisioning DUTs. 1541 1542 This is done by creating dummy_Pass tests. 1543 """ 1544 1545 1546 def __init__( 1547 self, 1548 tag, 1549 builds, 1550 board, 1551 devserver, 1552 num_required, 1553 num_max=float('inf'), 1554 cf_getter=None, 1555 run_prod_code=False, 1556 test_args=None, 1557 test_source_build=None, 1558 **kwargs): 1559 """ 1560 Constructor 1561 1562 @param tag: a string with which to tag jobs run in this suite. 1563 @param builds: the builds on which we're running this suite. 1564 @param board: the board on which we're running this suite. 1565 @param devserver: the devserver which contains the build. 1566 @param num_required: number of tests that must pass. This is 1567 capped by the number of tests that are run. 1568 @param num_max: max number of tests to make. By default there 1569 is no cap, a test is created for each eligible host. 1570 @param cf_getter: a control_file_getter.ControlFileGetter. 1571 @param test_args: A dict of args passed all the way to each individual 1572 test that will be actually ran. 1573 @param test_source_build: Build that contains the server-side test code. 1574 @param kwargs: Various keyword arguments passed to 1575 _BaseSuite constructor. 1576 """ 1577 super(ProvisionSuite, self).__init__( 1578 tests=[], 1579 tag=tag, 1580 builds=builds, 1581 board=board, 1582 **kwargs) 1583 self._num_successful = 0 1584 self._num_required = 0 1585 self.tests = [] 1586 1587 static_deps = [dep for dep in self._dependencies 1588 if not provision.Provision.acts_on(dep)] 1589 if 'pool:suites' in static_deps: 1590 logging.info('Provision suite is disabled on suites pool') 1591 return 1592 logging.debug('Looking for hosts matching %r', static_deps) 1593 hosts = self._afe.get_hosts( 1594 invalid=False, multiple_labels=static_deps) 1595 logging.debug('Found %d matching hosts for ProvisionSuite', len(hosts)) 1596 available_hosts = [h for h in hosts if h.is_available()] 1597 logging.debug('Found %d available hosts for ProvisionSuite', 1598 len(available_hosts)) 1599 dummy_test = _load_dummy_test( 1600 builds, devserver, cf_getter, 1601 run_prod_code, test_args, test_source_build) 1602 self.tests = [dummy_test] * min(len(available_hosts), num_max) 1603 logging.debug('Made %d tests for ProvisionSuite', len(self.tests)) 1604 self._num_required = min(num_required, len(self.tests)) 1605 logging.debug('Expecting %d tests to pass for ProvisionSuite', 1606 self._num_required) 1607 1608 def _handle_result(self, result, record, waiter): 1609 super(ProvisionSuite, self)._handle_result(result, record, waiter) 1610 if result.is_good(): 1611 self._num_successful += 1 1612 1613 def _finished_waiting(self): 1614 return self._num_successful >= self._num_required 1615 1616 1617def _load_dummy_test( 1618 builds, 1619 devserver, 1620 cf_getter=None, 1621 run_prod_code=False, 1622 test_args=None, 1623 test_source_build=None): 1624 """ 1625 Load and return the dummy pass test. 1626 1627 @param builds: the builds on which we're running this suite. 1628 @param devserver: the devserver which contains the build. 1629 @param cf_getter: a control_file_getter.ControlFileGetter. 1630 @param test_args: A dict of args passed all the way to each individual 1631 test that will be actually ran. 1632 @param test_source_build: Build that contains the server-side test code. 1633 """ 1634 if cf_getter is None: 1635 if run_prod_code: 1636 cf_getter = create_fs_getter(_AUTOTEST_DIR) 1637 else: 1638 build = suite_common.get_test_source_build( 1639 builds, test_source_build=test_source_build) 1640 devserver.stage_artifacts(image=build, 1641 artifacts=['control_files']) 1642 cf_getter = _create_ds_getter(build, devserver) 1643 retriever = _ControlFileRetriever(cf_getter, 1644 run_prod_code=run_prod_code, 1645 test_args=test_args) 1646 return retriever.retrieve_for_test('dummy_Pass') 1647 1648 1649class _ComposedPredicate(object): 1650 """Return the composition of the predicates. 1651 1652 Predicates are functions that take a test control data object and 1653 return True of that test is to be included. The returned 1654 predicate's set is the intersection of all of the input predicates' 1655 sets (it returns True if all predicates return True). 1656 """ 1657 1658 def __init__(self, predicates): 1659 """Initialize instance. 1660 1661 @param predicates: Iterable of predicates. 1662 """ 1663 self._predicates = list(predicates) 1664 1665 def __repr__(self): 1666 return '{cls}({this._predicates!r})'.format( 1667 cls=type(self).__name__, 1668 this=self, 1669 ) 1670 1671 def __call__(self, control_data_): 1672 return all(f(control_data_) for f in self._predicates) 1673 1674 1675def _is_nonexistent_board_error(e): 1676 """Return True if error is caused by nonexistent board label. 1677 1678 As of this writing, the particular case we want looks like this: 1679 1680 1) e.problem_keys is a dictionary 1681 2) e.problem_keys['meta_hosts'] exists as the only key 1682 in the dictionary. 1683 3) e.problem_keys['meta_hosts'] matches this pattern: 1684 "Label "board:.*" not found" 1685 1686 We check for conditions 1) and 2) on the 1687 theory that they're relatively immutable. 1688 We don't check condition 3) because it seems 1689 likely to be a maintenance burden, and for the 1690 times when we're wrong, being right shouldn't 1691 matter enough (we _hope_). 1692 1693 @param e: proxy.ValidationError instance 1694 @returns: boolean 1695 """ 1696 return (isinstance(e.problem_keys, dict) 1697 and len(e.problem_keys) == 1 1698 and 'meta_hosts' in e.problem_keys) 1699 1700 1701class _ResultReporter(object): 1702 """Abstract base class for reporting test results. 1703 1704 Usually, this is used to report test failures. 1705 """ 1706 1707 __metaclass__ = abc.ABCMeta 1708 1709 @abc.abstractmethod 1710 def report(self, result): 1711 """Report test result. 1712 1713 @param result: Status instance for job. 1714 """ 1715 1716 1717class _EmailReporter(_ResultReporter): 1718 """Class that emails based on test failures.""" 1719 1720 def __init__(self, suite, bug_template=None): 1721 self._suite = suite 1722 self._bug_template = bug_template or {} 1723 1724 def _get_test_bug(self, result): 1725 """Get TestBug for the given result. 1726 1727 @param result: Status instance for a test job. 1728 @returns: TestBug instance. 1729 """ 1730 # reporting modules have dependency on external packages, e.g., httplib2 1731 # Such dependency can cause issue to any module tries to import suite.py 1732 # without building site-packages first. Since the reporting modules are 1733 # only used in this function, move the imports here avoid the 1734 # requirement of building site packages to use other functions in this 1735 # module. 1736 from autotest_lib.server.cros.dynamic_suite import reporting 1737 1738 job_views = self._suite._tko.run('get_detailed_test_views', 1739 afe_job_id=result.id) 1740 return reporting.TestBug(self._suite._job_creator.cros_build, 1741 utils.get_chrome_version(job_views), 1742 self._suite._tag, 1743 result) 1744 1745 def _get_bug_template(self, result): 1746 """Get BugTemplate for test job. 1747 1748 @param result: Status instance for job. 1749 @param bug_template: A template dictionary specifying the default bug 1750 filing options for failures in this suite. 1751 @returns: BugTemplate instance 1752 """ 1753 # reporting modules have dependency on external packages, e.g., httplib2 1754 # Such dependency can cause issue to any module tries to import suite.py 1755 # without building site-packages first. Since the reporting modules are 1756 # only used in this function, move the imports here avoid the 1757 # requirement of building site packages to use other functions in this 1758 # module. 1759 from autotest_lib.server.cros.dynamic_suite import reporting_utils 1760 1761 # Try to merge with bug template in test control file. 1762 template = reporting_utils.BugTemplate(self._bug_template) 1763 try: 1764 test_data = self._suite._jobs_to_tests[result.id] 1765 return template.finalize_bug_template( 1766 test_data.bug_template) 1767 except AttributeError: 1768 # Test control file does not have bug template defined. 1769 return template.bug_template 1770 except reporting_utils.InvalidBugTemplateException as e: 1771 logging.error('Merging bug templates failed with ' 1772 'error: %s An empty bug template will ' 1773 'be used.', e) 1774 return {} 1775 1776 def report(self, result): 1777 # reporting modules have dependency on external 1778 # packages, e.g., httplib2 Such dependency can cause 1779 # issue to any module tries to import suite.py without 1780 # building site-packages first. Since the reporting 1781 # modules are only used in this function, move the 1782 # imports here avoid the requirement of building site 1783 # packages to use other functions in this module. 1784 from autotest_lib.server.cros.dynamic_suite import reporting 1785 1786 reporting.send_email( 1787 self._get_test_bug(result), 1788 self._get_bug_template(result)) 1789