1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import abc 6import datetime 7import difflib 8import functools 9import hashlib 10import logging 11import operator 12import os 13import re 14import sys 15import warnings 16 17import common 18 19from autotest_lib.frontend.afe.json_rpc import proxy 20from autotest_lib.client.common_lib import enum 21from autotest_lib.client.common_lib import error 22from autotest_lib.client.common_lib import global_config 23from autotest_lib.client.common_lib import priorities 24from autotest_lib.client.common_lib import time_utils 25from autotest_lib.client.common_lib import utils 26from autotest_lib.frontend.afe import model_attributes 27from autotest_lib.frontend.afe.json_rpc import proxy 28from autotest_lib.server.cros import provision 29from autotest_lib.server.cros.dynamic_suite import constants 30from autotest_lib.server.cros.dynamic_suite import control_file_getter 31from autotest_lib.server.cros.dynamic_suite import frontend_wrappers 32from autotest_lib.server.cros.dynamic_suite import job_status 33from autotest_lib.server.cros.dynamic_suite import suite_common 34from autotest_lib.server.cros.dynamic_suite import tools 35from autotest_lib.server.cros.dynamic_suite.job_status import Status 36 37try: 38 from chromite.lib import boolparse_lib 39 from chromite.lib import cros_logging as logging 40except ImportError: 41 print 'Unable to import chromite.' 42 print 'This script must be either:' 43 print ' - Be run in the chroot.' 44 print ' - (not yet supported) be run after running ' 45 print ' ../utils/build_externals.py' 46 47_FILE_BUG_SUITES = ['au', 'bvt', 'bvt-cq', 'bvt-inline', 'paygen_au_beta', 48 'paygen_au_canary', 'paygen_au_dev', 'paygen_au_stable', 49 'sanity', 'push_to_prod'] 50_AUTOTEST_DIR = global_config.global_config.get_config_value( 51 'SCHEDULER', 'drone_installation_directory') 52 53 54class RetryHandler(object): 55 """Maintain retry information. 56 57 @var _retry_map: A dictionary that stores retry history. 58 The key is afe job id. The value is a dictionary. 59 {job_id: {'state':RetryHandler.States, 'retry_max':int}} 60 - state: 61 The retry state of a job. 62 NOT_ATTEMPTED: 63 We haven't done anything about the job. 64 ATTEMPTED: 65 We've made an attempt to schedule a retry job. The 66 scheduling may or may not be successful, e.g. 67 it might encounter an rpc error. Note failure 68 in scheduling a retry is different from a retry job failure. 69 For each job, we only attempt to schedule a retry once. 70 For example, assume we have a test with JOB_RETRIES=5 and 71 its second retry job failed. When we attempt to create 72 a third retry job to retry the second, we hit an rpc 73 error. In such case, we will give up on all following 74 retries. 75 RETRIED: 76 A retry job has already been successfully 77 scheduled. 78 - retry_max: 79 The maximum of times the job can still 80 be retried, taking into account retries 81 that have occurred. 82 @var _retry_level: A retry might be triggered only if the result 83 is worse than the level. 84 @var _max_retries: Maximum retry limit at suite level. 85 Regardless how many times each individual test 86 has been retried, the total number of retries happening in 87 the suite can't exceed _max_retries. 88 """ 89 90 States = enum.Enum('NOT_ATTEMPTED', 'ATTEMPTED', 'RETRIED', 91 start_value=1, step=1) 92 93 def __init__(self, initial_jobs_to_tests, retry_level='WARN', 94 max_retries=None): 95 """Initialize RetryHandler. 96 97 @param initial_jobs_to_tests: A dictionary that maps a job id to 98 a ControlData object. This dictionary should contain 99 jobs that are originally scheduled by the suite. 100 @param retry_level: A retry might be triggered only if the result is 101 worse than the level. 102 @param max_retries: Integer, maxmium total retries allowed 103 for the suite. Default to None, no max. 104 """ 105 self._retry_map = {} 106 self._retry_level = retry_level 107 self._max_retries = (max_retries 108 if max_retries is not None else sys.maxint) 109 for job_id, test in initial_jobs_to_tests.items(): 110 if test.job_retries > 0: 111 self._add_job(new_job_id=job_id, 112 retry_max=test.job_retries) 113 else: 114 logging.debug("Test %s has no retries", test.name) 115 116 117 def _add_job(self, new_job_id, retry_max): 118 """Add a newly-created job to the retry map. 119 120 @param new_job_id: The afe_job_id of a newly created job. 121 @param retry_max: The maximum of times that we could retry 122 the test if the job fails. 123 124 @raises ValueError if new_job_id is already in retry map. 125 126 """ 127 if new_job_id in self._retry_map: 128 raise ValueError('add_job called when job is already in retry map.') 129 130 self._retry_map[new_job_id] = { 131 'state': self.States.NOT_ATTEMPTED, 132 'retry_max': retry_max} 133 134 135 def _suite_max_reached(self): 136 """Return whether maximum retry limit for a suite has been reached.""" 137 return self._max_retries <= 0 138 139 140 def add_retry(self, old_job_id, new_job_id): 141 """Record a retry. 142 143 Update retry map with the retry information. 144 145 @param old_job_id: The afe_job_id of the job that is retried. 146 @param new_job_id: The afe_job_id of the retry job. 147 148 @raises KeyError if old_job_id isn't in the retry map. 149 @raises ValueError if we have already retried or made an attempt 150 to retry the old job. 151 152 """ 153 old_record = self._retry_map[old_job_id] 154 if old_record['state'] != self.States.NOT_ATTEMPTED: 155 raise ValueError( 156 'We have already retried or attempted to retry job %d' % 157 old_job_id) 158 old_record['state'] = self.States.RETRIED 159 self._add_job(new_job_id=new_job_id, 160 retry_max=old_record['retry_max'] - 1) 161 self._max_retries -= 1 162 163 164 def set_attempted(self, job_id): 165 """Set the state of the job to ATTEMPTED. 166 167 @param job_id: afe_job_id of a job. 168 169 @raises KeyError if job_id isn't in the retry map. 170 @raises ValueError if the current state is not NOT_ATTEMPTED. 171 172 """ 173 current_state = self._retry_map[job_id]['state'] 174 if current_state != self.States.NOT_ATTEMPTED: 175 # We are supposed to retry or attempt to retry each job 176 # only once. Raise an error if this is not the case. 177 raise ValueError('Unexpected state transition: %s -> %s' % 178 (self.States.get_string(current_state), 179 self.States.get_string(self.States.ATTEMPTED))) 180 else: 181 self._retry_map[job_id]['state'] = self.States.ATTEMPTED 182 183 184 def has_following_retry(self, result): 185 """Check whether there will be a following retry. 186 187 We have the following cases for a given job id (result.id), 188 - no retry map entry -> retry not required, no following retry 189 - has retry map entry: 190 - already retried -> has following retry 191 - has not retried 192 (this branch can be handled by checking should_retry(result)) 193 - retry_max == 0 --> the last retry job, no more retry 194 - retry_max > 0 195 - attempted, but has failed in scheduling a 196 following retry due to rpc error --> no more retry 197 - has not attempped --> has following retry if test failed. 198 199 @param result: A result, encapsulating the status of the job. 200 201 @returns: True, if there will be a following retry. 202 False otherwise. 203 204 """ 205 return (result.test_executed 206 and result.id in self._retry_map 207 and (self._retry_map[result.id]['state'] == self.States.RETRIED 208 or self._should_retry(result))) 209 210 211 def _should_retry(self, result): 212 """Check whether we should retry a job based on its result. 213 214 We will retry the job that corresponds to the result 215 when all of the following are true. 216 a) The test was actually executed, meaning that if 217 a job was aborted before it could ever reach the state 218 of 'Running', the job will not be retried. 219 b) The result is worse than |self._retry_level| which 220 defaults to 'WARN'. 221 c) The test requires retry, i.e. the job has an entry in the retry map. 222 d) We haven't made any retry attempt yet, i.e. state == NOT_ATTEMPTED 223 Note that if a test has JOB_RETRIES=5, and the second time 224 it was retried it hit an rpc error, we will give up on 225 all following retries. 226 e) The job has not reached its retry max, i.e. retry_max > 0 227 228 @param result: A result, encapsulating the status of the job. 229 230 @returns: True if we should retry the job. 231 232 """ 233 return ( 234 result.test_executed 235 and result.id in self._retry_map 236 and not self._suite_max_reached() 237 and result.is_worse_than( 238 job_status.Status(self._retry_level, '', 'reason')) 239 and self._retry_map[result.id]['state'] == self.States.NOT_ATTEMPTED 240 and self._retry_map[result.id]['retry_max'] > 0 241 ) 242 243 def _should_retry_local_job(self, job_id): 244 """Check whether we should retry a job based on information available 245 for a local job without a Result object. 246 247 We will retry the job that corresponds to the result 248 when all of the following are true. 249 a) The test requires retry, i.e. the job has an entry in the retry map. 250 b) We haven't made any retry attempt yet for this job, i.e. 251 state == NOT_ATTEMPTED 252 If the job is aborted, we will give up on all following retries, 253 regardless of max_retries. 254 c) The job has not reached its retry max, i.e. retry_max > 0 255 256 @param job_id: the id for the job, to look up relevant information. 257 258 @returns: True if we should retry the job. 259 260 """ 261 if self._suite_max_reached(): 262 logging.debug('suite max_retries reached, not retrying.') 263 return False 264 if job_id not in self._retry_map: 265 logging.debug('job_id not in retry map, not retrying.') 266 return False 267 if self._retry_map[job_id]['state'] != self.States.NOT_ATTEMPTED: 268 logging.debug("job state was %s not 'Not Attempted', not retrying", 269 self._retry_map[job_id]['state']) 270 return False 271 if self._retry_map[job_id]['retry_max'] <= 0: 272 logging.debug('test-level retries exhausted, not retrying') 273 return False 274 return True 275 276 277 def job_present(self, job_id): 278 """Check whether a job id present in the retry map. 279 280 @param job_id: afe_job_id of a job. 281 282 @returns: A True if the job is present, False if not. 283 """ 284 return bool(self._retry_map.get(job_id)) 285 286 287 288 def get_retry_max(self, job_id): 289 """Get the maximum times the job can still be retried. 290 291 @param job_id: afe_job_id of a job. 292 293 @returns: An int, representing the maximum times the job can still be 294 retried. 295 @raises KeyError if job_id isn't in the retry map. 296 297 """ 298 return self._retry_map[job_id]['retry_max'] 299 300 301class _SuiteChildJobCreator(object): 302 """Create test jobs for a suite.""" 303 304 def __init__( 305 self, 306 tag, 307 builds, 308 board, 309 afe=None, 310 max_runtime_mins=24*60, 311 timeout_mins=24*60, 312 suite_job_id=None, 313 ignore_deps=False, 314 extra_deps=(), 315 priority=priorities.Priority.DEFAULT, 316 offload_failures_only=False, 317 test_source_build=None, 318 job_keyvals=None, 319 ): 320 """ 321 Constructor 322 323 @param tag: a string with which to tag jobs run in this suite. 324 @param builds: the builds on which we're running this suite. 325 @param board: the board on which we're running this suite. 326 @param afe: an instance of AFE as defined in server/frontend.py. 327 @param max_runtime_mins: Maximum suite runtime, in minutes. 328 @param timeout_mins: Maximum job lifetime, in minutes. 329 @param suite_job_id: Job id that will act as parent id to all sub jobs. 330 Default: None 331 @param ignore_deps: True if jobs should ignore the DEPENDENCIES 332 attribute and skip applying of dependency labels. 333 (Default:False) 334 @param extra_deps: A list of strings which are the extra DEPENDENCIES 335 to add to each test being scheduled. 336 @param priority: Integer priority level. Higher is more important. 337 @param offload_failures_only: Only enable gs_offloading for failed 338 jobs. 339 @param test_source_build: Build that contains the server-side test code. 340 @param job_keyvals: General job keyvals to be inserted into keyval file, 341 which will be used by tko/parse later. 342 """ 343 self._tag = tag 344 self._builds = builds 345 self._board = board 346 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30, 347 delay_sec=10, 348 debug=False) 349 self._max_runtime_mins = max_runtime_mins 350 self._timeout_mins = timeout_mins 351 self._suite_job_id = suite_job_id 352 self._ignore_deps = ignore_deps 353 self._extra_deps = tuple(extra_deps) 354 self._priority = priority 355 self._offload_failures_only = offload_failures_only 356 self._test_source_build = test_source_build 357 self._job_keyvals = job_keyvals 358 359 360 @property 361 def cros_build(self): 362 """Return the CrOS build or the first build in the builds dict.""" 363 # TODO(ayatane): Note that the builds dict isn't ordered. I'm not 364 # sure what the implications of this are, but it's probably not a 365 # good thing. 366 return self._builds.get(provision.CROS_VERSION_PREFIX, 367 self._builds.values()[0]) 368 369 370 def create_job(self, test, retry_for=None): 371 """ 372 Thin wrapper around frontend.AFE.create_job(). 373 374 @param test: ControlData object for a test to run. 375 @param retry_for: If the to-be-created job is a retry for an 376 old job, the afe_job_id of the old job will 377 be passed in as |retry_for|, which will be 378 recorded in the new job's keyvals. 379 @returns: A frontend.Job object with an added test_name member. 380 test_name is used to preserve the higher level TEST_NAME 381 name of the job. 382 """ 383 # For a system running multiple suites which share tests, the priority 384 # overridden may lead to unexpected scheduling order that adds extra 385 # provision jobs. 386 test_priority = self._priority 387 if utils.is_moblab(): 388 test_priority = max(self._priority, test.priority) 389 390 reboot_before = (model_attributes.RebootBefore.NEVER if test.fast 391 else None) 392 393 test_obj = self._afe.create_job( 394 control_file=test.text, 395 name=tools.create_job_name( 396 self._test_source_build or self.cros_build, 397 self._tag, 398 test.name), 399 control_type=test.test_type.capitalize(), 400 meta_hosts=[self._board]*test.sync_count, 401 dependencies=self._create_job_deps(test), 402 keyvals=self._create_keyvals_for_test_job(test, retry_for), 403 max_runtime_mins=self._max_runtime_mins, 404 timeout_mins=self._timeout_mins, 405 parent_job_id=self._suite_job_id, 406 reboot_before=reboot_before, 407 run_reset=not test.fast, 408 priority=test_priority, 409 synch_count=test.sync_count, 410 require_ssp=test.require_ssp) 411 412 test_obj.test_name = test.name 413 return test_obj 414 415 416 def _create_job_deps(self, test): 417 """Create job deps list for a test job. 418 419 @returns: A list of dependency strings. 420 """ 421 if self._ignore_deps: 422 job_deps = [] 423 else: 424 job_deps = list(test.dependencies) 425 job_deps.extend(self._extra_deps) 426 return job_deps 427 428 429 def _create_keyvals_for_test_job(self, test, retry_for=None): 430 """Create keyvals dict for creating a test job. 431 432 @param test: ControlData object for a test to run. 433 @param retry_for: If the to-be-created job is a retry for an 434 old job, the afe_job_id of the old job will 435 be passed in as |retry_for|, which will be 436 recorded in the new job's keyvals. 437 @returns: A keyvals dict for creating the test job. 438 """ 439 keyvals = { 440 constants.JOB_BUILD_KEY: self.cros_build, 441 constants.JOB_SUITE_KEY: self._tag, 442 constants.JOB_EXPERIMENTAL_KEY: test.experimental, 443 constants.JOB_BUILDS_KEY: self._builds 444 } 445 # test_source_build is saved to job_keyvals so scheduler can retrieve 446 # the build name from database when compiling autoserv commandline. 447 # This avoid a database change to add a new field in afe_jobs. 448 # 449 # Only add `test_source_build` to job keyvals if the build is different 450 # from the CrOS build or the job uses more than one build, e.g., both 451 # firmware and CrOS will be updated in the dut. 452 # This is for backwards compatibility, so the update Autotest code can 453 # compile an autoserv command line to run in a SSP container using 454 # previous builds. 455 if (self._test_source_build and 456 (self.cros_build != self._test_source_build or 457 len(self._builds) > 1)): 458 keyvals[constants.JOB_TEST_SOURCE_BUILD_KEY] = \ 459 self._test_source_build 460 for prefix, build in self._builds.iteritems(): 461 if prefix == provision.FW_RW_VERSION_PREFIX: 462 keyvals[constants.FWRW_BUILD]= build 463 elif prefix == provision.FW_RO_VERSION_PREFIX: 464 keyvals[constants.FWRO_BUILD] = build 465 # Add suite job id to keyvals so tko parser can read it from keyval 466 # file. 467 if self._suite_job_id: 468 keyvals[constants.PARENT_JOB_ID] = self._suite_job_id 469 # We drop the old job's id in the new job's keyval file so that 470 # later our tko parser can figure out the retry relationship and 471 # invalidate the results of the old job in tko database. 472 if retry_for: 473 keyvals[constants.RETRY_ORIGINAL_JOB_ID] = retry_for 474 if self._offload_failures_only: 475 keyvals[constants.JOB_OFFLOAD_FAILURES_KEY] = True 476 if self._job_keyvals: 477 for key in constants.INHERITED_KEYVALS: 478 if key in self._job_keyvals: 479 keyvals[key] = self._job_keyvals[key] 480 return keyvals 481 482 483class _ControlFileRetriever(object): 484 """Retrieves control files. 485 486 This returns control data instances, unlike control file getters 487 which simply return the control file text contents. 488 """ 489 490 def __init__(self, cf_getter, forgiving_parser=True, run_prod_code=False, 491 test_args=None): 492 """Initialize instance. 493 494 @param cf_getter: a control_file_getter.ControlFileGetter used to list 495 and fetch the content of control files 496 @param forgiving_parser: If False, will raise ControlVariableExceptions 497 if any are encountered when parsing control 498 files. Note that this can raise an exception 499 for syntax errors in unrelated files, because 500 we parse them before applying the predicate. 501 @param run_prod_code: If true, the retrieved tests will run the test 502 code that lives in prod aka the test code 503 currently on the lab servers by disabling 504 SSP for the discovered tests. 505 @param test_args: A dict of args to be seeded in test control file under 506 the name |args_dict|. 507 """ 508 self._cf_getter = cf_getter 509 self._forgiving_parser = forgiving_parser 510 self._run_prod_code = run_prod_code 511 self._test_args = test_args 512 513 514 def retrieve_for_test(self, test_name): 515 """Retrieve a test's control data. 516 517 This ignores forgiving_parser because we cannot return a 518 forgiving value. 519 520 @param test_name: Name of test to retrieve. 521 522 @raises ControlVariableException: There is a syntax error in a 523 control file. 524 525 @returns a ControlData object 526 """ 527 return suite_common.retrieve_control_data_for_test( 528 self._cf_getter, test_name) 529 530 531 def retrieve_for_suite(self, suite_name=''): 532 """Scan through all tests and find all tests. 533 534 @param suite_name: If specified, this method will attempt to restrain 535 the search space to just this suite's control files. 536 537 @raises ControlVariableException: If forgiving_parser is False and there 538 is a syntax error in a control file. 539 540 @returns a dictionary of ControlData objects that based on given 541 parameters. 542 """ 543 tests = suite_common.retrieve_for_suite( 544 self._cf_getter, suite_name, self._forgiving_parser, 545 self._test_args) 546 if self._run_prod_code: 547 for test in tests.itervalues(): 548 test.require_ssp = False 549 550 return tests 551 552 553def list_all_suites(build, devserver, cf_getter=None): 554 """ 555 Parses all ControlData objects with a SUITE tag and extracts all 556 defined suite names. 557 558 @param build: the build on which we're running this suite. 559 @param devserver: the devserver which contains the build. 560 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to 561 using DevServerGetter. 562 563 @return list of suites 564 """ 565 if cf_getter is None: 566 cf_getter = _create_ds_getter(build, devserver) 567 568 suites = set() 569 predicate = lambda t: True 570 for test in find_and_parse_tests(cf_getter, predicate): 571 suites.update(test.suite_tag_parts) 572 return list(suites) 573 574 575def test_file_similarity_predicate(test_file_pattern): 576 """Returns predicate that gets the similarity based on a test's file 577 name pattern. 578 579 Builds a predicate that takes in a parsed control file (a ControlData) 580 and returns a tuple of (file path, ratio), where ratio is the 581 similarity between the test file name and the given test_file_pattern. 582 583 @param test_file_pattern: regular expression (string) to match against 584 control file names. 585 @return a callable that takes a ControlData and and returns a tuple of 586 (file path, ratio), where ratio is the similarity between the 587 test file name and the given test_file_pattern. 588 """ 589 return lambda t: ((None, 0) if not hasattr(t, 'path') else 590 (t.path, difflib.SequenceMatcher(a=t.path, 591 b=test_file_pattern).ratio())) 592 593 594def test_name_similarity_predicate(test_name): 595 """Returns predicate that matched based on a test's name. 596 597 Builds a predicate that takes in a parsed control file (a ControlData) 598 and returns a tuple of (test name, ratio), where ratio is the similarity 599 between the test name and the given test_name. 600 601 @param test_name: the test name to base the predicate on. 602 @return a callable that takes a ControlData and returns a tuple of 603 (test name, ratio), where ratio is the similarity between the 604 test name and the given test_name. 605 """ 606 return lambda t: ((None, 0) if not hasattr(t, 'name') else 607 (t.name, 608 difflib.SequenceMatcher(a=t.name, b=test_name).ratio())) 609 610 611def matches_attribute_expression_predicate(test_attr_boolstr): 612 """Returns predicate that matches based on boolean expression of 613 attributes. 614 615 Builds a predicate that takes in a parsed control file (a ControlData) 616 ans returns True if the test attributes satisfy the given attribute 617 boolean expression. 618 619 @param test_attr_boolstr: boolean expression of the attributes to be 620 test, like 'system:all and interval:daily'. 621 622 @return a callable that takes a ControlData and returns True if the test 623 attributes satisfy the given boolean expression. 624 """ 625 return lambda t: boolparse_lib.BoolstrResult( 626 test_attr_boolstr, t.attributes) 627 628 629def test_file_matches_pattern_predicate(test_file_pattern): 630 """Returns predicate that matches based on a test's file name pattern. 631 632 Builds a predicate that takes in a parsed control file (a ControlData) 633 and returns True if the test's control file name matches the given 634 regular expression. 635 636 @param test_file_pattern: regular expression (string) to match against 637 control file names. 638 @return a callable that takes a ControlData and and returns 639 True if control file name matches the pattern. 640 """ 641 return lambda t: hasattr(t, 'path') and re.match(test_file_pattern, 642 t.path) 643 644 645def test_name_matches_pattern_predicate(test_name_pattern): 646 """Returns predicate that matches based on a test's name pattern. 647 648 Builds a predicate that takes in a parsed control file (a ControlData) 649 and returns True if the test name matches the given regular expression. 650 651 @param test_name_pattern: regular expression (string) to match against 652 test names. 653 @return a callable that takes a ControlData and returns 654 True if the name fields matches the pattern. 655 """ 656 return lambda t: hasattr(t, 'name') and re.match(test_name_pattern, 657 t.name) 658 659 660def test_name_equals_predicate(test_name): 661 """Returns predicate that matched based on a test's name. 662 663 Builds a predicate that takes in a parsed control file (a ControlData) 664 and returns True if the test name is equal to |test_name|. 665 666 @param test_name: the test name to base the predicate on. 667 @return a callable that takes a ControlData and looks for |test_name| 668 in that ControlData's name. 669 """ 670 return lambda t: hasattr(t, 'name') and test_name == t.name 671 672 673def name_in_tag_similarity_predicate(name): 674 """Returns predicate that takes a control file and gets the similarity 675 of the suites in the control file and the given name. 676 677 Builds a predicate that takes in a parsed control file (a ControlData) 678 and returns a list of tuples of (suite name, ratio), where suite name 679 is each suite listed in the control file, and ratio is the similarity 680 between each suite and the given name. 681 682 @param name: the suite name to base the predicate on. 683 @return a callable that takes a ControlData and returns a list of tuples 684 of (suite name, ratio), where suite name is each suite listed in 685 the control file, and ratio is the similarity between each suite 686 and the given name. 687 """ 688 return lambda t: [(suite, 689 difflib.SequenceMatcher(a=suite, b=name).ratio()) 690 for suite in t.suite_tag_parts] or [(None, 0)] 691 692 693def name_in_tag_predicate(name): 694 """Returns predicate that takes a control file and looks for |name|. 695 696 Builds a predicate that takes in a parsed control file (a ControlData) 697 and returns True if the SUITE tag is present and contains |name|. 698 699 @param name: the suite name to base the predicate on. 700 @return a callable that takes a ControlData and looks for |name| in that 701 ControlData object's suite member. 702 """ 703 return suite_common.name_in_tag_predicate(name) 704 705 706def create_fs_getter(autotest_dir): 707 """ 708 @param autotest_dir: the place to find autotests. 709 @return a FileSystemGetter instance that looks under |autotest_dir|. 710 """ 711 # currently hard-coded places to look for tests. 712 subpaths = ['server/site_tests', 'client/site_tests', 713 'server/tests', 'client/tests'] 714 directories = [os.path.join(autotest_dir, p) for p in subpaths] 715 return control_file_getter.FileSystemGetter(directories) 716 717 718def _create_ds_getter(build, devserver): 719 """ 720 @param build: the build on which we're running this suite. 721 @param devserver: the devserver which contains the build. 722 @return a FileSystemGetter instance that looks under |autotest_dir|. 723 """ 724 return control_file_getter.DevServerGetter(build, devserver) 725 726 727def _non_experimental_tests_predicate(test_data): 728 """Test predicate for non-experimental tests.""" 729 return not test_data.experimental 730 731 732def find_and_parse_tests(cf_getter, predicate, suite_name='', 733 add_experimental=False, forgiving_parser=True, 734 run_prod_code=False, test_args=None): 735 """ 736 Function to scan through all tests and find eligible tests. 737 738 Search through all tests based on given cf_getter, suite_name, 739 add_experimental and forgiving_parser, return the tests that match 740 given predicate. 741 742 @param cf_getter: a control_file_getter.ControlFileGetter used to list 743 and fetch the content of control files 744 @param predicate: a function that should return True when run over a 745 ControlData representation of a control file that should be in 746 this Suite. 747 @param suite_name: If specified, this method will attempt to restrain 748 the search space to just this suite's control files. 749 @param add_experimental: add tests with experimental attribute set. 750 @param forgiving_parser: If False, will raise ControlVariableExceptions 751 if any are encountered when parsing control 752 files. Note that this can raise an exception 753 for syntax errors in unrelated files, because 754 we parse them before applying the predicate. 755 @param run_prod_code: If true, the suite will run the test code that 756 lives in prod aka the test code currently on the 757 lab servers by disabling SSP for the discovered 758 tests. 759 @param test_args: A dict of args to be seeded in test control file. 760 761 @raises ControlVariableException: If forgiving_parser is False and there 762 is a syntax error in a control file. 763 764 @return list of ControlData objects that should be run, with control 765 file text added in |text| attribute. Results are sorted based 766 on the TIME setting in control file, slowest test comes first. 767 """ 768 logging.debug('Getting control file list for suite: %s', suite_name) 769 retriever = _ControlFileRetriever(cf_getter, 770 forgiving_parser=forgiving_parser, 771 run_prod_code=run_prod_code, 772 test_args=test_args) 773 tests = retriever.retrieve_for_suite(suite_name) 774 if not add_experimental: 775 predicate = _ComposedPredicate([predicate, 776 _non_experimental_tests_predicate]) 777 return suite_common.filter_tests(tests, predicate) 778 779 780def find_possible_tests(cf_getter, predicate, suite_name='', count=10): 781 """ 782 Function to scan through all tests and find possible tests. 783 784 Search through all tests based on given cf_getter, suite_name, 785 add_experimental and forgiving_parser. Use the given predicate to 786 calculate the similarity and return the top 10 matches. 787 788 @param cf_getter: a control_file_getter.ControlFileGetter used to list 789 and fetch the content of control files 790 @param predicate: a function that should return a tuple of (name, ratio) 791 when run over a ControlData representation of a control file that 792 should be in this Suite. `name` is the key to be compared, e.g., 793 a suite name or test name. `ratio` is a value between [0,1] 794 indicating the similarity of `name` and the value to be compared. 795 @param suite_name: If specified, this method will attempt to restrain 796 the search space to just this suite's control files. 797 @param count: Number of suggestions to return, default to 10. 798 799 @return list of top names that similar to the given test, sorted by 800 match ratio. 801 """ 802 logging.debug('Getting control file list for suite: %s', suite_name) 803 tests = _ControlFileRetriever(cf_getter).retrieve_for_suite(suite_name) 804 logging.debug('Parsed %s control files.', len(tests)) 805 similarities = {} 806 for test in tests.itervalues(): 807 ratios = predicate(test) 808 # Some predicates may return a list of tuples, e.g., 809 # name_in_tag_similarity_predicate. Convert all returns to a list. 810 if not isinstance(ratios, list): 811 ratios = [ratios] 812 for name, ratio in ratios: 813 similarities[name] = ratio 814 return [s[0] for s in 815 sorted(similarities.items(), key=operator.itemgetter(1), 816 reverse=True)][:count] 817 818 819def _deprecated_suite_method(func): 820 """Decorator for deprecated Suite static methods. 821 822 TODO(ayatane): This is used to decorate functions that are called as 823 static methods on Suite. 824 """ 825 @functools.wraps(func) 826 def wrapper(*args, **kwargs): 827 """Wraps |func| for warning.""" 828 warnings.warn('Calling method "%s" from Suite is deprecated' % 829 func.__name__) 830 return func(*args, **kwargs) 831 return staticmethod(wrapper) 832 833 834class _BaseSuite(object): 835 """ 836 A suite of tests, defined by some predicate over control file variables. 837 838 Given a place to search for control files a predicate to match the desired 839 tests, can gather tests and fire off jobs to run them, and then wait for 840 results. 841 842 @var _predicate: a function that should return True when run over a 843 ControlData representation of a control file that should be in 844 this Suite. 845 @var _tag: a string with which to tag jobs run in this suite. 846 @var _builds: the builds on which we're running this suite. 847 @var _afe: an instance of AFE as defined in server/frontend.py. 848 @var _tko: an instance of TKO as defined in server/frontend.py. 849 @var _jobs: currently scheduled jobs, if any. 850 @var _jobs_to_tests: a dictionary that maps job ids to tests represented 851 ControlData objects. 852 @var _retry: a bool value indicating whether jobs should be retried on 853 failure. 854 @var _retry_handler: a RetryHandler object. 855 856 """ 857 858 859 def __init__( 860 self, 861 tests, 862 tag, 863 builds, 864 board, 865 afe=None, 866 tko=None, 867 pool=None, 868 results_dir=None, 869 max_runtime_mins=24*60, 870 timeout_mins=24*60, 871 file_bugs=False, 872 suite_job_id=None, 873 ignore_deps=False, 874 extra_deps=None, 875 priority=priorities.Priority.DEFAULT, 876 wait_for_results=True, 877 job_retry=False, 878 max_retries=sys.maxint, 879 offload_failures_only=False, 880 test_source_build=None, 881 job_keyvals=None, 882 child_dependencies=(), 883 result_reporter=None, 884 ): 885 """Initialize instance. 886 887 @param tests: Iterable of tests to run. 888 @param tag: a string with which to tag jobs run in this suite. 889 @param builds: the builds on which we're running this suite. 890 @param board: the board on which we're running this suite. 891 @param afe: an instance of AFE as defined in server/frontend.py. 892 @param tko: an instance of TKO as defined in server/frontend.py. 893 @param pool: Specify the pool of machines to use for scheduling 894 purposes. 895 @param results_dir: The directory where the job can write results to. 896 This must be set if you want job_id of sub-jobs 897 list in the job keyvals. 898 @param max_runtime_mins: Maximum suite runtime, in minutes. 899 @param timeout: Maximum job lifetime, in hours. 900 @param suite_job_id: Job id that will act as parent id to all sub jobs. 901 Default: None 902 @param ignore_deps: True if jobs should ignore the DEPENDENCIES 903 attribute and skip applying of dependency labels. 904 (Default:False) 905 @param extra_deps: A list of strings which are the extra DEPENDENCIES 906 to add to each test being scheduled. 907 @param priority: Integer priority level. Higher is more important. 908 @param wait_for_results: Set to False to run the suite job without 909 waiting for test jobs to finish. Default is 910 True. 911 @param job_retry: A bool value indicating whether jobs should be retried 912 on failure. If True, the field 'JOB_RETRIES' in 913 control files will be respected. If False, do not 914 retry. 915 @param max_retries: Maximum retry limit at suite level. 916 Regardless how many times each individual test 917 has been retried, the total number of retries 918 happening in the suite can't exceed _max_retries. 919 Default to sys.maxint. 920 @param offload_failures_only: Only enable gs_offloading for failed 921 jobs. 922 @param test_source_build: Build that contains the server-side test code. 923 @param job_keyvals: General job keyvals to be inserted into keyval file, 924 which will be used by tko/parse later. 925 @param child_dependencies: (optional) list of dependency strings 926 to be added as dependencies to child jobs. 927 @param result_reporter: A _ResultReporter instance to report results. If 928 None, an _EmailReporter will be created. 929 """ 930 931 self.tests = list(tests) 932 self._tag = tag 933 self._builds = builds 934 self._results_dir = results_dir 935 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30, 936 delay_sec=10, 937 debug=False) 938 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30, 939 delay_sec=10, 940 debug=False) 941 self._jobs = [] 942 self._jobs_to_tests = {} 943 944 self._file_bugs = file_bugs 945 self._suite_job_id = suite_job_id 946 self._job_retry=job_retry 947 self._max_retries = max_retries 948 # RetryHandler to be initialized in schedule() 949 self._retry_handler = None 950 self.wait_for_results = wait_for_results 951 self._job_keyvals = job_keyvals 952 if result_reporter is None: 953 self._result_reporter = _EmailReporter(self) 954 else: 955 self._result_reporter = result_reporter 956 957 if extra_deps is None: 958 extra_deps = [] 959 extra_deps.append(board) 960 if pool: 961 extra_deps.append(pool) 962 extra_deps.extend(child_dependencies) 963 self._dependencies = tuple(extra_deps) 964 965 self._job_creator = _SuiteChildJobCreator( 966 tag=tag, 967 builds=builds, 968 board=board, 969 afe=afe, 970 max_runtime_mins=max_runtime_mins, 971 timeout_mins=timeout_mins, 972 suite_job_id=suite_job_id, 973 ignore_deps=ignore_deps, 974 extra_deps=extra_deps, 975 priority=priority, 976 offload_failures_only=offload_failures_only, 977 test_source_build=test_source_build, 978 job_keyvals=job_keyvals, 979 ) 980 981 982 def _schedule_test(self, record, test, retry_for=None): 983 """Schedule a single test and return the job. 984 985 Schedule a single test by creating a job, and then update relevant 986 data structures that are used to keep track of all running jobs. 987 988 Emits a TEST_NA status log entry if it failed to schedule the test due 989 to NoEligibleHostException or a non-existent board label. 990 991 Returns a frontend.Job object if the test is successfully scheduled. 992 If scheduling failed due to NoEligibleHostException or a non-existent 993 board label, returns None. 994 995 @param record: A callable to use for logging. 996 prototype: record(base_job.status_log_entry) 997 @param test: ControlData for a test to run. 998 @param retry_for: If we are scheduling a test to retry an 999 old job, the afe_job_id of the old job 1000 will be passed in as |retry_for|. 1001 1002 @returns: A frontend.Job object or None 1003 """ 1004 msg = 'Scheduling %s' % test.name 1005 if retry_for: 1006 msg = msg + ', to retry afe job %d' % retry_for 1007 logging.debug(msg) 1008 begin_time_str = datetime.datetime.now().strftime(time_utils.TIME_FMT) 1009 try: 1010 job = self._job_creator.create_job(test, retry_for=retry_for) 1011 except (error.NoEligibleHostException, proxy.ValidationError) as e: 1012 if (isinstance(e, error.NoEligibleHostException) 1013 or (isinstance(e, proxy.ValidationError) 1014 and _is_nonexistent_board_error(e))): 1015 # Treat a dependency on a non-existent board label the same as 1016 # a dependency on a board that exists, but for which there's no 1017 # hardware. 1018 logging.debug('%s not applicable for this board/pool. ' 1019 'Emitting TEST_NA.', test.name) 1020 Status('TEST_NA', test.name, 1021 'Skipping: test not supported on this board/pool.', 1022 begin_time_str=begin_time_str).record_all(record) 1023 return None 1024 else: 1025 raise e 1026 except (error.RPCException, proxy.JSONRPCException): 1027 if retry_for: 1028 # Mark that we've attempted to retry the old job. 1029 logging.debug("RPC exception occurred") 1030 self._retry_handler.set_attempted(job_id=retry_for) 1031 raise 1032 else: 1033 self._jobs.append(job) 1034 self._jobs_to_tests[job.id] = test 1035 if retry_for: 1036 # A retry job was just created, record it. 1037 self._retry_handler.add_retry( 1038 old_job_id=retry_for, new_job_id=job.id) 1039 retry_count = (test.job_retries - 1040 self._retry_handler.get_retry_max(job.id)) 1041 logging.debug('Job %d created to retry job %d. ' 1042 'Have retried for %d time(s)', 1043 job.id, retry_for, retry_count) 1044 self._remember_job_keyval(job) 1045 return job 1046 1047 def schedule(self, record): 1048 """ 1049 Schedule jobs using |self._afe|. 1050 1051 frontend.Job objects representing each scheduled job will be put in 1052 |self._jobs|. 1053 1054 @param record: A callable to use for logging. 1055 prototype: record(base_job.status_log_entry) 1056 @returns: The number of tests that were scheduled. 1057 """ 1058 scheduled_test_names = [] 1059 logging.debug('Discovered %d tests.', len(self.tests)) 1060 1061 Status('INFO', 'Start %s' % self._tag).record_result(record) 1062 try: 1063 # Write job_keyvals into keyval file. 1064 if self._job_keyvals: 1065 utils.write_keyval(self._results_dir, self._job_keyvals) 1066 1067 # TODO(crbug.com/730885): This is a hack to protect tests that are 1068 # not usually retried from getting hit by a provision error when run 1069 # as part of a suite. Remove this hack once provision is separated 1070 # out in its own suite. 1071 self._bump_up_test_retries(self.tests) 1072 for test in self.tests: 1073 scheduled_job = self._schedule_test(record, test) 1074 if scheduled_job is not None: 1075 scheduled_test_names.append(test.name) 1076 1077 # Write the num of scheduled tests and name of them to keyval file. 1078 logging.debug('Scheduled %d tests, writing the total to keyval.', 1079 len(scheduled_test_names)) 1080 utils.write_keyval( 1081 self._results_dir, 1082 self._make_scheduled_tests_keyvals(scheduled_test_names)) 1083 except Exception: 1084 logging.exception('Exception while scheduling suite') 1085 Status('FAIL', self._tag, 1086 'Exception while scheduling suite').record_result(record) 1087 1088 if self._job_retry: 1089 logging.debug("Initializing RetryHandler for suite %s.", self._tag) 1090 self._retry_handler = RetryHandler( 1091 initial_jobs_to_tests=self._jobs_to_tests, 1092 max_retries=self._max_retries) 1093 logging.debug("retry map created: %s ", 1094 self._retry_handler._retry_map) 1095 else: 1096 logging.info("Will not retry jobs from suite %s.", self._tag) 1097 return len(scheduled_test_names) 1098 1099 1100 def _bump_up_test_retries(self, tests): 1101 """Bump up individual test retries to match suite retry options.""" 1102 if not self._job_retry: 1103 return 1104 1105 for test in tests: 1106 # We do honor if a test insists on JOB_RETRIES = 0. 1107 if test.job_retries is None: 1108 logging.debug( 1109 'Test %s did not request retries, but suite requires ' 1110 'retries. Bumping retries up to 1. ' 1111 '(See crbug.com/730885)', 1112 test.name) 1113 test.job_retries = 1 1114 1115 1116 def _make_scheduled_tests_keyvals(self, scheduled_test_names): 1117 """Make a keyvals dict to write for scheduled test names. 1118 1119 @param scheduled_test_names: A list of scheduled test name strings. 1120 1121 @returns: A keyvals dict. 1122 """ 1123 return { 1124 constants.SCHEDULED_TEST_COUNT_KEY: len(scheduled_test_names), 1125 constants.SCHEDULED_TEST_NAMES_KEY: repr(scheduled_test_names), 1126 } 1127 1128 1129 def _should_report(self, result): 1130 """ 1131 Returns True if this failure requires to be reported. 1132 1133 @param result: A result, encapsulating the status of the failed job. 1134 @return: True if we should report this failure. 1135 """ 1136 return (self._file_bugs and result.test_executed and 1137 not result.is_testna() and 1138 result.is_worse_than(job_status.Status('GOOD', '', 'reason'))) 1139 1140 1141 def _has_retry(self, result): 1142 """ 1143 Return True if this result gets to retry. 1144 1145 @param result: A result, encapsulating the status of the failed job. 1146 @return: bool 1147 """ 1148 return (self._job_retry 1149 and self._retry_handler.has_following_retry(result)) 1150 1151 1152 def wait(self, record): 1153 """ 1154 Polls for the job statuses, using |record| to print status when each 1155 completes. 1156 1157 @param record: callable that records job status. 1158 prototype: 1159 record(base_job.status_log_entry) 1160 """ 1161 waiter = job_status.JobResultWaiter(self._afe, self._tko) 1162 try: 1163 if self._suite_job_id: 1164 jobs = self._afe.get_jobs(parent_job_id=self._suite_job_id) 1165 else: 1166 logging.warning('Unknown suite_job_id, falling back to less ' 1167 'efficient results_generator.') 1168 jobs = self._jobs 1169 waiter.add_jobs(jobs) 1170 for result in waiter.wait_for_results(): 1171 self._handle_result(result=result, record=record, waiter=waiter) 1172 if self._finished_waiting(): 1173 break 1174 except Exception: # pylint: disable=W0703 1175 logging.exception('Exception waiting for results') 1176 Status('FAIL', self._tag, 1177 'Exception waiting for results').record_result(record) 1178 1179 1180 def _finished_waiting(self): 1181 """Return whether the suite is finished waiting for child jobs.""" 1182 return False 1183 1184 1185 def _handle_result(self, result, record, waiter): 1186 """ 1187 Handle a test job result. 1188 1189 @param result: Status instance for job. 1190 @param record: callable that records job status. 1191 prototype: 1192 record(base_job.status_log_entry) 1193 @param waiter: JobResultsWaiter instance. 1194 1195 @instance_param _result_reporter: _ResultReporter instance. 1196 """ 1197 self._record_result(result, record) 1198 rescheduled = False 1199 if self._job_retry and self._retry_handler._should_retry(result): 1200 rescheduled = self._retry_result(result, record, waiter) 1201 # TODO (crbug.com/751428): If the suite times out before a retry could 1202 # finish, we would lose the chance to report errors from the original 1203 # job. 1204 if self._has_retry(result) and rescheduled: 1205 return 1206 1207 if self._should_report(result): 1208 self._result_reporter.report(result) 1209 1210 def _record_result(self, result, record): 1211 """ 1212 Record a test job result. 1213 1214 @param result: Status instance for job. 1215 @param record: callable that records job status. 1216 prototype: 1217 record(base_job.status_log_entry) 1218 """ 1219 result.record_all(record) 1220 self._remember_job_keyval(result) 1221 1222 1223 def _retry_result(self, result, record, waiter): 1224 """ 1225 Retry a test job result. 1226 1227 @param result: Status instance for job. 1228 @param record: callable that records job status. 1229 prototype: 1230 record(base_job.status_log_entry) 1231 @param waiter: JobResultsWaiter instance. 1232 @returns: True if a job was scheduled for retry, False otherwise. 1233 """ 1234 test = self._jobs_to_tests[result.id] 1235 try: 1236 # It only takes effect for CQ retriable job: 1237 # 1) in first try, test.fast=True. 1238 # 2) in second try, test will be run in normal mode, so reset 1239 # test.fast=False. 1240 test.fast = False 1241 new_job = self._schedule_test( 1242 record=record, test=test, retry_for=result.id) 1243 except (error.RPCException, proxy.JSONRPCException) as e: 1244 logging.error('Failed to schedule test: %s, Reason: %s', 1245 test.name, e) 1246 return False 1247 else: 1248 waiter.add_job(new_job) 1249 return bool(new_job) 1250 1251 @property 1252 def jobs(self): 1253 """Give a copy of the associated jobs 1254 1255 @returns: array of jobs""" 1256 return [job for job in self._jobs] 1257 1258 1259 @property 1260 def _should_file_bugs(self): 1261 """Return whether bugs should be filed. 1262 1263 @returns: bool 1264 """ 1265 # File bug when failure is one of the _FILE_BUG_SUITES, 1266 # otherwise send an email to the owner anc cc. 1267 return self._tag in _FILE_BUG_SUITES 1268 1269 1270 def abort(self): 1271 """ 1272 Abort all scheduled test jobs. 1273 """ 1274 if self._jobs: 1275 job_ids = [job.id for job in self._jobs] 1276 self._afe.run('abort_host_queue_entries', job__id__in=job_ids) 1277 1278 1279 def _remember_job_keyval(self, job): 1280 """ 1281 Record provided job as a suite job keyval, for later referencing. 1282 1283 @param job: some representation of a job that has the attributes: 1284 id, test_name, and owner 1285 """ 1286 if self._results_dir and job.id and job.owner and job.test_name: 1287 job_id_owner = '%s-%s' % (job.id, job.owner) 1288 logging.debug('Adding job keyval for %s=%s', 1289 job.test_name, job_id_owner) 1290 utils.write_keyval( 1291 self._results_dir, 1292 {hashlib.md5(job.test_name).hexdigest(): job_id_owner}) 1293 1294 1295class Suite(_BaseSuite): 1296 """ 1297 A suite of tests, defined by some predicate over control file variables. 1298 1299 Given a place to search for control files a predicate to match the desired 1300 tests, can gather tests and fire off jobs to run them, and then wait for 1301 results. 1302 1303 @var _predicate: a function that should return True when run over a 1304 ControlData representation of a control file that should be in 1305 this Suite. 1306 @var _tag: a string with which to tag jobs run in this suite. 1307 @var _builds: the builds on which we're running this suite. 1308 @var _afe: an instance of AFE as defined in server/frontend.py. 1309 @var _tko: an instance of TKO as defined in server/frontend.py. 1310 @var _jobs: currently scheduled jobs, if any. 1311 @var _jobs_to_tests: a dictionary that maps job ids to tests represented 1312 ControlData objects. 1313 @var _cf_getter: a control_file_getter.ControlFileGetter 1314 @var _retry: a bool value indicating whether jobs should be retried on 1315 failure. 1316 @var _retry_handler: a RetryHandler object. 1317 1318 """ 1319 1320 # TODO(ayatane): These methods are kept on the Suite class for 1321 # backward compatibility. 1322 find_and_parse_tests = _deprecated_suite_method(find_and_parse_tests) 1323 find_possible_tests = _deprecated_suite_method(find_possible_tests) 1324 create_fs_getter = _deprecated_suite_method(create_fs_getter) 1325 name_in_tag_predicate = _deprecated_suite_method( 1326 suite_common.name_in_tag_predicate) 1327 name_in_tag_similarity_predicate = _deprecated_suite_method( 1328 name_in_tag_similarity_predicate) 1329 test_name_equals_predicate = _deprecated_suite_method( 1330 test_name_equals_predicate) 1331 test_name_matches_pattern_predicate = _deprecated_suite_method( 1332 test_name_matches_pattern_predicate) 1333 test_file_matches_pattern_predicate = _deprecated_suite_method( 1334 test_file_matches_pattern_predicate) 1335 matches_attribute_expression_predicate = _deprecated_suite_method( 1336 matches_attribute_expression_predicate) 1337 test_name_similarity_predicate = _deprecated_suite_method( 1338 test_name_similarity_predicate) 1339 test_file_similarity_predicate = _deprecated_suite_method( 1340 test_file_similarity_predicate) 1341 list_all_suites = _deprecated_suite_method(list_all_suites) 1342 get_test_source_build = _deprecated_suite_method( 1343 suite_common.get_test_source_build) 1344 1345 1346 @classmethod 1347 def create_from_predicates(cls, predicates, builds, board, devserver, 1348 cf_getter=None, name='ad_hoc_suite', 1349 run_prod_code=False, **dargs): 1350 """ 1351 Create a Suite using a given predicate test filters. 1352 1353 Uses supplied predicate(s) to instantiate a Suite. Looks for tests in 1354 |autotest_dir| and will schedule them using |afe|. Pulls control files 1355 from the default dev server. Results will be pulled from |tko| upon 1356 completion. 1357 1358 @param predicates: A list of callables that accept ControlData 1359 representations of control files. A test will be 1360 included in suite if all callables in this list 1361 return True on the given control file. 1362 @param builds: the builds on which we're running this suite. It's a 1363 dictionary of version_prefix:build. 1364 @param board: the board on which we're running this suite. 1365 @param devserver: the devserver which contains the build. 1366 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to 1367 using DevServerGetter. 1368 @param name: name of suite. Defaults to 'ad_hoc_suite' 1369 @param run_prod_code: If true, the suite will run the tests that 1370 lives in prod aka the test code currently on the 1371 lab servers. 1372 @param **dargs: Any other Suite constructor parameters, as described 1373 in Suite.__init__ docstring. 1374 @return a Suite instance. 1375 """ 1376 if cf_getter is None: 1377 if run_prod_code: 1378 cf_getter = create_fs_getter(_AUTOTEST_DIR) 1379 else: 1380 build = suite_common.get_test_source_build(builds, **dargs) 1381 cf_getter = _create_ds_getter(build, devserver) 1382 1383 return cls(predicates, 1384 name, builds, board, cf_getter, run_prod_code, **dargs) 1385 1386 1387 @classmethod 1388 def create_from_name(cls, name, builds, board, devserver, cf_getter=None, 1389 **dargs): 1390 """ 1391 Create a Suite using a predicate based on the SUITE control file var. 1392 1393 Makes a predicate based on |name| and uses it to instantiate a Suite 1394 that looks for tests in |autotest_dir| and will schedule them using 1395 |afe|. Pulls control files from the default dev server. 1396 Results will be pulled from |tko| upon completion. 1397 1398 @param name: a value of the SUITE control file variable to search for. 1399 @param builds: the builds on which we're running this suite. It's a 1400 dictionary of version_prefix:build. 1401 @param board: the board on which we're running this suite. 1402 @param devserver: the devserver which contains the build. 1403 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to 1404 using DevServerGetter. 1405 @param **dargs: Any other Suite constructor parameters, as described 1406 in Suite.__init__ docstring. 1407 @return a Suite instance. 1408 """ 1409 if cf_getter is None: 1410 build = suite_common.get_test_source_build(builds, **dargs) 1411 cf_getter = _create_ds_getter(build, devserver) 1412 1413 return cls([suite_common.name_in_tag_predicate(name)], 1414 name, builds, board, cf_getter, **dargs) 1415 1416 1417 def __init__( 1418 self, 1419 predicates, 1420 tag, 1421 builds, 1422 board, 1423 cf_getter, 1424 run_prod_code=False, 1425 afe=None, 1426 tko=None, 1427 pool=None, 1428 results_dir=None, 1429 max_runtime_mins=24*60, 1430 timeout_mins=24*60, 1431 file_bugs=False, 1432 suite_job_id=None, 1433 ignore_deps=False, 1434 extra_deps=None, 1435 priority=priorities.Priority.DEFAULT, 1436 forgiving_parser=True, 1437 wait_for_results=True, 1438 job_retry=False, 1439 max_retries=sys.maxint, 1440 offload_failures_only=False, 1441 test_source_build=None, 1442 job_keyvals=None, 1443 test_args=None, 1444 child_dependencies=(), 1445 result_reporter=None, 1446 ): 1447 """ 1448 Constructor 1449 1450 @param predicates: A list of callables that accept ControlData 1451 representations of control files. A test will be 1452 included in suite if all callables in this list 1453 return True on the given control file. 1454 @param tag: a string with which to tag jobs run in this suite. 1455 @param builds: the builds on which we're running this suite. 1456 @param board: the board on which we're running this suite. 1457 @param cf_getter: a control_file_getter.ControlFileGetter 1458 @param afe: an instance of AFE as defined in server/frontend.py. 1459 @param tko: an instance of TKO as defined in server/frontend.py. 1460 @param pool: Specify the pool of machines to use for scheduling 1461 purposes. 1462 @param run_prod_code: If true, the suite will run the test code that 1463 lives in prod aka the test code currently on the 1464 lab servers. 1465 @param results_dir: The directory where the job can write results to. 1466 This must be set if you want job_id of sub-jobs 1467 list in the job keyvals. 1468 @param max_runtime_mins: Maximum suite runtime, in minutes. 1469 @param timeout: Maximum job lifetime, in hours. 1470 @param suite_job_id: Job id that will act as parent id to all sub jobs. 1471 Default: None 1472 @param ignore_deps: True if jobs should ignore the DEPENDENCIES 1473 attribute and skip applying of dependency labels. 1474 (Default:False) 1475 @param extra_deps: A list of strings which are the extra DEPENDENCIES 1476 to add to each test being scheduled. 1477 @param priority: Integer priority level. Higher is more important. 1478 @param wait_for_results: Set to False to run the suite job without 1479 waiting for test jobs to finish. Default is 1480 True. 1481 @param job_retry: A bool value indicating whether jobs should be retried 1482 on failure. If True, the field 'JOB_RETRIES' in 1483 control files will be respected. If False, do not 1484 retry. 1485 @param max_retries: Maximum retry limit at suite level. 1486 Regardless how many times each individual test 1487 has been retried, the total number of retries 1488 happening in the suite can't exceed _max_retries. 1489 Default to sys.maxint. 1490 @param offload_failures_only: Only enable gs_offloading for failed 1491 jobs. 1492 @param test_source_build: Build that contains the server-side test code. 1493 @param job_keyvals: General job keyvals to be inserted into keyval file, 1494 which will be used by tko/parse later. 1495 @param test_args: A dict of args passed all the way to each individual 1496 test that will be actually ran. 1497 @param child_dependencies: (optional) list of dependency strings 1498 to be added as dependencies to child jobs. 1499 @param result_reporter: A _ResultReporter instance to report results. If 1500 None, an _EmailReporter will be created. 1501 """ 1502 tests = find_and_parse_tests( 1503 cf_getter, 1504 _ComposedPredicate(predicates), 1505 tag, 1506 forgiving_parser=forgiving_parser, 1507 run_prod_code=run_prod_code, 1508 test_args=test_args, 1509 ) 1510 super(Suite, self).__init__( 1511 tests=tests, 1512 tag=tag, 1513 builds=builds, 1514 board=board, 1515 afe=afe, 1516 tko=tko, 1517 pool=pool, 1518 results_dir=results_dir, 1519 max_runtime_mins=max_runtime_mins, 1520 timeout_mins=timeout_mins, 1521 file_bugs=file_bugs, 1522 suite_job_id=suite_job_id, 1523 ignore_deps=ignore_deps, 1524 extra_deps=extra_deps, 1525 priority=priority, 1526 wait_for_results=wait_for_results, 1527 job_retry=job_retry, 1528 max_retries=max_retries, 1529 offload_failures_only=offload_failures_only, 1530 test_source_build=test_source_build, 1531 job_keyvals=job_keyvals, 1532 child_dependencies=child_dependencies, 1533 result_reporter=result_reporter, 1534 ) 1535 1536 1537class ProvisionSuite(_BaseSuite): 1538 """ 1539 A suite for provisioning DUTs. 1540 1541 This is done by creating dummy_Pass tests. 1542 """ 1543 1544 1545 def __init__( 1546 self, 1547 tag, 1548 builds, 1549 board, 1550 devserver, 1551 num_required, 1552 num_max=float('inf'), 1553 cf_getter=None, 1554 run_prod_code=False, 1555 test_args=None, 1556 test_source_build=None, 1557 **kwargs): 1558 """ 1559 Constructor 1560 1561 @param tag: a string with which to tag jobs run in this suite. 1562 @param builds: the builds on which we're running this suite. 1563 @param board: the board on which we're running this suite. 1564 @param devserver: the devserver which contains the build. 1565 @param num_required: number of tests that must pass. This is 1566 capped by the number of tests that are run. 1567 @param num_max: max number of tests to make. By default there 1568 is no cap, a test is created for each eligible host. 1569 @param cf_getter: a control_file_getter.ControlFileGetter. 1570 @param test_args: A dict of args passed all the way to each individual 1571 test that will be actually ran. 1572 @param test_source_build: Build that contains the server-side test code. 1573 @param kwargs: Various keyword arguments passed to 1574 _BaseSuite constructor. 1575 """ 1576 super(ProvisionSuite, self).__init__( 1577 tests=[], 1578 tag=tag, 1579 builds=builds, 1580 board=board, 1581 **kwargs) 1582 self._num_successful = 0 1583 self._num_required = 0 1584 self.tests = [] 1585 1586 static_deps = [dep for dep in self._dependencies 1587 if not provision.Provision.acts_on(dep)] 1588 if 'pool:suites' in static_deps: 1589 logging.info('Provision suite is disabled on suites pool') 1590 return 1591 logging.debug('Looking for hosts matching %r', static_deps) 1592 hosts = self._afe.get_hosts( 1593 invalid=False, multiple_labels=static_deps) 1594 logging.debug('Found %d matching hosts for ProvisionSuite', len(hosts)) 1595 available_hosts = [h for h in hosts if h.is_available()] 1596 logging.debug('Found %d available hosts for ProvisionSuite', 1597 len(available_hosts)) 1598 dummy_test = _load_dummy_test( 1599 builds, devserver, cf_getter, 1600 run_prod_code, test_args, test_source_build) 1601 self.tests = [dummy_test] * min(len(available_hosts), num_max) 1602 logging.debug('Made %d tests for ProvisionSuite', len(self.tests)) 1603 self._num_required = min(num_required, len(self.tests)) 1604 logging.debug('Expecting %d tests to pass for ProvisionSuite', 1605 self._num_required) 1606 1607 def _handle_result(self, result, record, waiter): 1608 super(ProvisionSuite, self)._handle_result(result, record, waiter) 1609 if result.is_good(): 1610 self._num_successful += 1 1611 1612 def _finished_waiting(self): 1613 return self._num_successful >= self._num_required 1614 1615 1616def _load_dummy_test( 1617 builds, 1618 devserver, 1619 cf_getter=None, 1620 run_prod_code=False, 1621 test_args=None, 1622 test_source_build=None): 1623 """ 1624 Load and return the dummy pass test. 1625 1626 @param builds: the builds on which we're running this suite. 1627 @param devserver: the devserver which contains the build. 1628 @param cf_getter: a control_file_getter.ControlFileGetter. 1629 @param test_args: A dict of args passed all the way to each individual 1630 test that will be actually ran. 1631 @param test_source_build: Build that contains the server-side test code. 1632 """ 1633 if cf_getter is None: 1634 if run_prod_code: 1635 cf_getter = create_fs_getter(_AUTOTEST_DIR) 1636 else: 1637 build = suite_common.get_test_source_build( 1638 builds, test_source_build=test_source_build) 1639 devserver.stage_artifacts(image=build, 1640 artifacts=['control_files']) 1641 cf_getter = _create_ds_getter(build, devserver) 1642 retriever = _ControlFileRetriever(cf_getter, 1643 run_prod_code=run_prod_code, 1644 test_args=test_args) 1645 return retriever.retrieve_for_test('dummy_Pass') 1646 1647 1648class _ComposedPredicate(object): 1649 """Return the composition of the predicates. 1650 1651 Predicates are functions that take a test control data object and 1652 return True of that test is to be included. The returned 1653 predicate's set is the intersection of all of the input predicates' 1654 sets (it returns True if all predicates return True). 1655 """ 1656 1657 def __init__(self, predicates): 1658 """Initialize instance. 1659 1660 @param predicates: Iterable of predicates. 1661 """ 1662 self._predicates = list(predicates) 1663 1664 def __repr__(self): 1665 return '{cls}({this._predicates!r})'.format( 1666 cls=type(self).__name__, 1667 this=self, 1668 ) 1669 1670 def __call__(self, control_data_): 1671 return all(f(control_data_) for f in self._predicates) 1672 1673 1674def _is_nonexistent_board_error(e): 1675 """Return True if error is caused by nonexistent board label. 1676 1677 As of this writing, the particular case we want looks like this: 1678 1679 1) e.problem_keys is a dictionary 1680 2) e.problem_keys['meta_hosts'] exists as the only key 1681 in the dictionary. 1682 3) e.problem_keys['meta_hosts'] matches this pattern: 1683 "Label "board:.*" not found" 1684 1685 We check for conditions 1) and 2) on the 1686 theory that they're relatively immutable. 1687 We don't check condition 3) because it seems 1688 likely to be a maintenance burden, and for the 1689 times when we're wrong, being right shouldn't 1690 matter enough (we _hope_). 1691 1692 @param e: proxy.ValidationError instance 1693 @returns: boolean 1694 """ 1695 return (isinstance(e.problem_keys, dict) 1696 and len(e.problem_keys) == 1 1697 and 'meta_hosts' in e.problem_keys) 1698 1699 1700class _ResultReporter(object): 1701 """Abstract base class for reporting test results. 1702 1703 Usually, this is used to report test failures. 1704 """ 1705 1706 __metaclass__ = abc.ABCMeta 1707 1708 @abc.abstractmethod 1709 def report(self, result): 1710 """Report test result. 1711 1712 @param result: Status instance for job. 1713 """ 1714 1715 1716class _EmailReporter(_ResultReporter): 1717 """Class that emails based on test failures.""" 1718 1719 # TODO(akeshet): Document what |bug_template| is actually supposed to come 1720 # from, and rename it to something unrelated to "bugs" which are no longer 1721 # relevant now that this is purely an email sender. 1722 def __init__(self, suite, bug_template=None): 1723 self._suite = suite 1724 self._bug_template = bug_template or {} 1725 1726 def _get_test_bug(self, result): 1727 """Get TestBug for the given result. 1728 1729 @param result: Status instance for a test job. 1730 @returns: TestBug instance. 1731 """ 1732 # reporting modules have dependency on external packages, e.g., httplib2 1733 # Such dependency can cause issue to any module tries to import suite.py 1734 # without building site-packages first. Since the reporting modules are 1735 # only used in this function, move the imports here avoid the 1736 # requirement of building site packages to use other functions in this 1737 # module. 1738 from autotest_lib.server.cros.dynamic_suite import reporting 1739 1740 job_views = self._suite._tko.run('get_detailed_test_views', 1741 afe_job_id=result.id) 1742 return reporting.TestBug(self._suite._job_creator.cros_build, 1743 utils.get_chrome_version(job_views), 1744 self._suite._tag, 1745 result) 1746 1747 def _get_bug_template(self, result): 1748 """Get BugTemplate for test job. 1749 1750 @param result: Status instance for job. 1751 @param bug_template: A template dictionary specifying the default bug 1752 filing options for failures in this suite. 1753 @returns: BugTemplate instance 1754 """ 1755 # reporting modules have dependency on external packages, e.g., httplib2 1756 # Such dependency can cause issue to any module tries to import suite.py 1757 # without building site-packages first. Since the reporting modules are 1758 # only used in this function, move the imports here avoid the 1759 # requirement of building site packages to use other functions in this 1760 # module. 1761 from autotest_lib.server.cros.dynamic_suite import reporting_utils 1762 1763 # Try to merge with bug template in test control file. 1764 template = reporting_utils.BugTemplate(self._bug_template) 1765 try: 1766 test_data = self._suite._jobs_to_tests[result.id] 1767 return template.finalize_bug_template( 1768 test_data.bug_template) 1769 except AttributeError: 1770 # Test control file does not have bug template defined. 1771 return template.bug_template 1772 except reporting_utils.InvalidBugTemplateException as e: 1773 logging.error('Merging bug templates failed with ' 1774 'error: %s An empty bug template will ' 1775 'be used.', e) 1776 return {} 1777 1778 def report(self, result): 1779 # reporting modules have dependency on external 1780 # packages, e.g., httplib2 Such dependency can cause 1781 # issue to any module tries to import suite.py without 1782 # building site-packages first. Since the reporting 1783 # modules are only used in this function, move the 1784 # imports here avoid the requirement of building site 1785 # packages to use other functions in this module. 1786 from autotest_lib.server.cros.dynamic_suite import reporting 1787 1788 reporting.send_email( 1789 self._get_test_bug(result), 1790 self._get_bug_template(result)) 1791