1# Lint as: python2, python3 2# Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6from __future__ import absolute_import 7from __future__ import division 8from __future__ import print_function 9 10import abc 11import datetime 12import difflib 13import functools 14import hashlib 15import logging 16import operator 17import os 18import re 19import six 20import sys 21import warnings 22 23import common 24 25from autotest_lib.frontend.afe.json_rpc import proxy 26from autotest_lib.client.common_lib import autotest_enum 27from autotest_lib.client.common_lib import error 28from autotest_lib.client.common_lib import global_config 29from autotest_lib.client.common_lib import priorities 30from autotest_lib.client.common_lib import time_utils 31from autotest_lib.client.common_lib import utils 32from autotest_lib.frontend.afe import model_attributes 33from autotest_lib.frontend.afe.json_rpc import proxy 34from autotest_lib.server.cros import provision 35from autotest_lib.server.cros.dynamic_suite import constants 36from autotest_lib.server.cros.dynamic_suite import control_file_getter 37from autotest_lib.server.cros.dynamic_suite import frontend_wrappers 38from autotest_lib.server.cros.dynamic_suite import job_status 39from autotest_lib.server.cros.dynamic_suite import suite_common 40from autotest_lib.server.cros.dynamic_suite import tools 41from autotest_lib.server.cros.dynamic_suite.job_status import Status 42 43try: 44 from autotest_lib.server.cros.dynamic_suite import boolparse_lib 45except ImportError as e: 46 print('Unable to import boolparse_lib: %s' % (e,)) 47 print('This script must be either:') 48 print(' - Be run in the chroot.') 49 print(' - (not yet supported) be run after running ') 50 print(' ../utils/build_externals.py') 51 52_FILE_BUG_SUITES = ['au', 'bvt', 'bvt-cq', 'bvt-inline', 'paygen_au_beta', 53 'paygen_au_canary', 'paygen_au_dev', 'paygen_au_stable', 54 'sanity', 'push_to_prod'] 55_AUTOTEST_DIR = global_config.global_config.get_config_value( 56 'SCHEDULER', 'drone_installation_directory') 57 58 59class RetryHandler(object): 60 """Maintain retry information. 61 62 @var _retry_map: A dictionary that stores retry history. 63 The key is afe job id. The value is a dictionary. 64 {job_id: {'state':RetryHandler.States, 'retry_max':int}} 65 - state: 66 The retry state of a job. 67 NOT_ATTEMPTED: 68 We haven't done anything about the job. 69 ATTEMPTED: 70 We've made an attempt to schedule a retry job. The 71 scheduling may or may not be successful, e.g. 72 it might encounter an rpc error. Note failure 73 in scheduling a retry is different from a retry job failure. 74 For each job, we only attempt to schedule a retry once. 75 For example, assume we have a test with JOB_RETRIES=5 and 76 its second retry job failed. When we attempt to create 77 a third retry job to retry the second, we hit an rpc 78 error. In such case, we will give up on all following 79 retries. 80 RETRIED: 81 A retry job has already been successfully 82 scheduled. 83 - retry_max: 84 The maximum of times the job can still 85 be retried, taking into account retries 86 that have occurred. 87 @var _retry_level: A retry might be triggered only if the result 88 is worse than the level. 89 @var _max_retries: Maximum retry limit at suite level. 90 Regardless how many times each individual test 91 has been retried, the total number of retries happening in 92 the suite can't exceed _max_retries. 93 """ 94 95 States = autotest_enum.AutotestEnum('NOT_ATTEMPTED', 'ATTEMPTED', 'RETRIED', 96 start_value=1, step=1) 97 98 def __init__(self, initial_jobs_to_tests, retry_level='WARN', 99 max_retries=None): 100 """Initialize RetryHandler. 101 102 @param initial_jobs_to_tests: A dictionary that maps a job id to 103 a ControlData object. This dictionary should contain 104 jobs that are originally scheduled by the suite. 105 @param retry_level: A retry might be triggered only if the result is 106 worse than the level. 107 @param max_retries: Integer, maxmium total retries allowed 108 for the suite. Default to None, no max. 109 """ 110 self._retry_map = {} 111 self._retry_level = retry_level 112 self._max_retries = (max_retries 113 if max_retries is not None else sys.maxsize) 114 for job_id, test in initial_jobs_to_tests.items(): 115 if test.job_retries > 0: 116 self._add_job(new_job_id=job_id, 117 retry_max=test.job_retries) 118 else: 119 logging.debug("Test %s has no retries", test.name) 120 121 122 def _add_job(self, new_job_id, retry_max): 123 """Add a newly-created job to the retry map. 124 125 @param new_job_id: The afe_job_id of a newly created job. 126 @param retry_max: The maximum of times that we could retry 127 the test if the job fails. 128 129 @raises ValueError if new_job_id is already in retry map. 130 131 """ 132 if new_job_id in self._retry_map: 133 raise ValueError('add_job called when job is already in retry map.') 134 135 self._retry_map[new_job_id] = { 136 'state': self.States.NOT_ATTEMPTED, 137 'retry_max': retry_max} 138 139 140 def _suite_max_reached(self): 141 """Return whether maximum retry limit for a suite has been reached.""" 142 return self._max_retries <= 0 143 144 145 def add_retry(self, old_job_id, new_job_id): 146 """Record a retry. 147 148 Update retry map with the retry information. 149 150 @param old_job_id: The afe_job_id of the job that is retried. 151 @param new_job_id: The afe_job_id of the retry job. 152 153 @raises KeyError if old_job_id isn't in the retry map. 154 @raises ValueError if we have already retried or made an attempt 155 to retry the old job. 156 157 """ 158 old_record = self._retry_map[old_job_id] 159 if old_record['state'] != self.States.NOT_ATTEMPTED: 160 raise ValueError( 161 'We have already retried or attempted to retry job %d' % 162 old_job_id) 163 old_record['state'] = self.States.RETRIED 164 self._add_job(new_job_id=new_job_id, 165 retry_max=old_record['retry_max'] - 1) 166 self._max_retries -= 1 167 168 169 def set_attempted(self, job_id): 170 """Set the state of the job to ATTEMPTED. 171 172 @param job_id: afe_job_id of a job. 173 174 @raises KeyError if job_id isn't in the retry map. 175 @raises ValueError if the current state is not NOT_ATTEMPTED. 176 177 """ 178 current_state = self._retry_map[job_id]['state'] 179 if current_state != self.States.NOT_ATTEMPTED: 180 # We are supposed to retry or attempt to retry each job 181 # only once. Raise an error if this is not the case. 182 raise ValueError('Unexpected state transition: %s -> %s' % 183 (self.States.get_string(current_state), 184 self.States.get_string(self.States.ATTEMPTED))) 185 else: 186 self._retry_map[job_id]['state'] = self.States.ATTEMPTED 187 188 189 def has_following_retry(self, result): 190 """Check whether there will be a following retry. 191 192 We have the following cases for a given job id (result.id), 193 - no retry map entry -> retry not required, no following retry 194 - has retry map entry: 195 - already retried -> has following retry 196 - has not retried 197 (this branch can be handled by checking should_retry(result)) 198 - retry_max == 0 --> the last retry job, no more retry 199 - retry_max > 0 200 - attempted, but has failed in scheduling a 201 following retry due to rpc error --> no more retry 202 - has not attempped --> has following retry if test failed. 203 204 @param result: A result, encapsulating the status of the job. 205 206 @returns: True, if there will be a following retry. 207 False otherwise. 208 209 """ 210 return (result.test_executed 211 and result.id in self._retry_map 212 and (self._retry_map[result.id]['state'] == self.States.RETRIED 213 or self._should_retry(result))) 214 215 216 def _should_retry(self, result): 217 """Check whether we should retry a job based on its result. 218 219 We will retry the job that corresponds to the result 220 when all of the following are true. 221 a) The test was actually executed, meaning that if 222 a job was aborted before it could ever reach the state 223 of 'Running', the job will not be retried. 224 b) The result is worse than |self._retry_level| which 225 defaults to 'WARN'. 226 c) The test requires retry, i.e. the job has an entry in the retry map. 227 d) We haven't made any retry attempt yet, i.e. state == NOT_ATTEMPTED 228 Note that if a test has JOB_RETRIES=5, and the second time 229 it was retried it hit an rpc error, we will give up on 230 all following retries. 231 e) The job has not reached its retry max, i.e. retry_max > 0 232 233 @param result: A result, encapsulating the status of the job. 234 235 @returns: True if we should retry the job. 236 237 """ 238 return ( 239 result.test_executed 240 and result.id in self._retry_map 241 and not self._suite_max_reached() 242 and result.is_worse_than( 243 job_status.Status(self._retry_level, '', 'reason')) 244 and self._retry_map[result.id]['state'] == self.States.NOT_ATTEMPTED 245 and self._retry_map[result.id]['retry_max'] > 0 246 ) 247 248 def _should_retry_local_job(self, job_id): 249 """Check whether we should retry a job based on information available 250 for a local job without a Result object. 251 252 We will retry the job that corresponds to the result 253 when all of the following are true. 254 a) The test requires retry, i.e. the job has an entry in the retry map. 255 b) We haven't made any retry attempt yet for this job, i.e. 256 state == NOT_ATTEMPTED 257 If the job is aborted, we will give up on all following retries, 258 regardless of max_retries. 259 c) The job has not reached its retry max, i.e. retry_max > 0 260 261 @param job_id: the id for the job, to look up relevant information. 262 263 @returns: True if we should retry the job. 264 265 """ 266 if self._suite_max_reached(): 267 logging.debug('suite max_retries reached, not retrying.') 268 return False 269 if job_id not in self._retry_map: 270 logging.debug('job_id not in retry map, not retrying.') 271 return False 272 if self._retry_map[job_id]['state'] != self.States.NOT_ATTEMPTED: 273 logging.debug("job state was %s not 'Not Attempted', not retrying", 274 self._retry_map[job_id]['state']) 275 return False 276 if self._retry_map[job_id]['retry_max'] <= 0: 277 logging.debug('test-level retries exhausted, not retrying') 278 return False 279 return True 280 281 282 def job_present(self, job_id): 283 """Check whether a job id present in the retry map. 284 285 @param job_id: afe_job_id of a job. 286 287 @returns: A True if the job is present, False if not. 288 """ 289 return bool(self._retry_map.get(job_id)) 290 291 292 293 def get_retry_max(self, job_id): 294 """Get the maximum times the job can still be retried. 295 296 @param job_id: afe_job_id of a job. 297 298 @returns: An int, representing the maximum times the job can still be 299 retried. 300 @raises KeyError if job_id isn't in the retry map. 301 302 """ 303 return self._retry_map[job_id]['retry_max'] 304 305 306class _SuiteChildJobCreator(object): 307 """Create test jobs for a suite.""" 308 309 def __init__( 310 self, 311 tag, 312 builds, 313 board, 314 afe=None, 315 max_runtime_mins=24*60, 316 timeout_mins=24*60, 317 suite_job_id=None, 318 ignore_deps=False, 319 extra_deps=(), 320 priority=priorities.Priority.DEFAULT, 321 offload_failures_only=False, 322 test_source_build=None, 323 job_keyvals=None, 324 ): 325 """ 326 Constructor 327 328 @param tag: a string with which to tag jobs run in this suite. 329 @param builds: the builds on which we're running this suite. 330 @param board: the board on which we're running this suite. 331 @param afe: an instance of AFE as defined in server/frontend.py. 332 @param max_runtime_mins: Maximum suite runtime, in minutes. 333 @param timeout_mins: Maximum job lifetime, in minutes. 334 @param suite_job_id: Job id that will act as parent id to all sub jobs. 335 Default: None 336 @param ignore_deps: True if jobs should ignore the DEPENDENCIES 337 attribute and skip applying of dependency labels. 338 (Default:False) 339 @param extra_deps: A list of strings which are the extra DEPENDENCIES 340 to add to each test being scheduled. 341 @param priority: Integer priority level. Higher is more important. 342 @param offload_failures_only: Only enable gs_offloading for failed 343 jobs. 344 @param test_source_build: Build that contains the server-side test code. 345 @param job_keyvals: General job keyvals to be inserted into keyval file, 346 which will be used by tko/parse later. 347 """ 348 self._tag = tag 349 self._builds = builds 350 self._board = board 351 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30, 352 delay_sec=10, 353 debug=False) 354 self._max_runtime_mins = max_runtime_mins 355 self._timeout_mins = timeout_mins 356 self._suite_job_id = suite_job_id 357 self._ignore_deps = ignore_deps 358 self._extra_deps = tuple(extra_deps) 359 self._priority = priority 360 self._offload_failures_only = offload_failures_only 361 self._test_source_build = test_source_build 362 self._job_keyvals = job_keyvals 363 364 365 @property 366 def cros_build(self): 367 """Return the CrOS build or the first build in the builds dict.""" 368 # TODO(ayatane): Note that the builds dict isn't ordered. I'm not 369 # sure what the implications of this are, but it's probably not a 370 # good thing. 371 return self._builds.get(provision.CROS_VERSION_PREFIX, 372 list(self._builds.values())[0]) 373 374 375 def create_job(self, test, retry_for=None): 376 """ 377 Thin wrapper around frontend.AFE.create_job(). 378 379 @param test: ControlData object for a test to run. 380 @param retry_for: If the to-be-created job is a retry for an 381 old job, the afe_job_id of the old job will 382 be passed in as |retry_for|, which will be 383 recorded in the new job's keyvals. 384 @returns: A frontend.Job object with an added test_name member. 385 test_name is used to preserve the higher level TEST_NAME 386 name of the job. 387 """ 388 # For a system running multiple suites which share tests, the priority 389 # overridden may lead to unexpected scheduling order that adds extra 390 # provision jobs. 391 test_priority = self._priority 392 if utils.is_moblab(): 393 test_priority = max(self._priority, test.priority) 394 395 reboot_before = (model_attributes.RebootBefore.NEVER if test.fast 396 else None) 397 398 test_obj = self._afe.create_job( 399 control_file=test.text, 400 name=tools.create_job_name( 401 self._test_source_build or self.cros_build, 402 self._tag, 403 test.name), 404 control_type=test.test_type.capitalize(), 405 meta_hosts=[self._board]*test.sync_count, 406 dependencies=self._create_job_deps(test), 407 keyvals=self._create_keyvals_for_test_job(test, retry_for), 408 max_runtime_mins=self._max_runtime_mins, 409 timeout_mins=self._timeout_mins, 410 parent_job_id=self._suite_job_id, 411 reboot_before=reboot_before, 412 run_reset=not test.fast, 413 priority=test_priority, 414 synch_count=test.sync_count, 415 require_ssp=test.require_ssp) 416 417 test_obj.test_name = test.name 418 return test_obj 419 420 421 def _create_job_deps(self, test): 422 """Create job deps list for a test job. 423 424 @returns: A list of dependency strings. 425 """ 426 if self._ignore_deps: 427 job_deps = [] 428 else: 429 job_deps = list(test.dependencies) 430 job_deps.extend(self._extra_deps) 431 return job_deps 432 433 434 def _create_keyvals_for_test_job(self, test, retry_for=None): 435 """Create keyvals dict for creating a test job. 436 437 @param test: ControlData object for a test to run. 438 @param retry_for: If the to-be-created job is a retry for an 439 old job, the afe_job_id of the old job will 440 be passed in as |retry_for|, which will be 441 recorded in the new job's keyvals. 442 @returns: A keyvals dict for creating the test job. 443 """ 444 keyvals = { 445 constants.JOB_BUILD_KEY: self.cros_build, 446 constants.JOB_SUITE_KEY: self._tag, 447 constants.JOB_EXPERIMENTAL_KEY: test.experimental, 448 constants.JOB_BUILDS_KEY: self._builds 449 } 450 # test_source_build is saved to job_keyvals so scheduler can retrieve 451 # the build name from database when compiling autoserv commandline. 452 # This avoid a database change to add a new field in afe_jobs. 453 # 454 # Only add `test_source_build` to job keyvals if the build is different 455 # from the CrOS build or the job uses more than one build, e.g., both 456 # firmware and CrOS will be updated in the dut. 457 # This is for backwards compatibility, so the update Autotest code can 458 # compile an autoserv command line to run in a SSP container using 459 # previous builds. 460 if (self._test_source_build and 461 (self.cros_build != self._test_source_build or 462 len(self._builds) > 1)): 463 keyvals[constants.JOB_TEST_SOURCE_BUILD_KEY] = \ 464 self._test_source_build 465 for prefix, build in six.iteritems(self._builds): 466 if prefix == provision.FW_RW_VERSION_PREFIX: 467 keyvals[constants.FWRW_BUILD]= build 468 elif prefix == provision.FW_RO_VERSION_PREFIX: 469 keyvals[constants.FWRO_BUILD] = build 470 # Add suite job id to keyvals so tko parser can read it from keyval 471 # file. 472 if self._suite_job_id: 473 keyvals[constants.PARENT_JOB_ID] = self._suite_job_id 474 # We drop the old job's id in the new job's keyval file so that 475 # later our tko parser can figure out the retry relationship and 476 # invalidate the results of the old job in tko database. 477 if retry_for: 478 keyvals[constants.RETRY_ORIGINAL_JOB_ID] = retry_for 479 if self._offload_failures_only: 480 keyvals[constants.JOB_OFFLOAD_FAILURES_KEY] = True 481 if self._job_keyvals: 482 for key in constants.INHERITED_KEYVALS: 483 if key in self._job_keyvals: 484 keyvals[key] = self._job_keyvals[key] 485 return keyvals 486 487 488class _ControlFileRetriever(object): 489 """Retrieves control files. 490 491 This returns control data instances, unlike control file getters 492 which simply return the control file text contents. 493 """ 494 495 def __init__(self, cf_getter, forgiving_parser=True, run_prod_code=False, 496 test_args=None): 497 """Initialize instance. 498 499 @param cf_getter: a control_file_getter.ControlFileGetter used to list 500 and fetch the content of control files 501 @param forgiving_parser: If False, will raise ControlVariableExceptions 502 if any are encountered when parsing control 503 files. Note that this can raise an exception 504 for syntax errors in unrelated files, because 505 we parse them before applying the predicate. 506 @param run_prod_code: If true, the retrieved tests will run the test 507 code that lives in prod aka the test code 508 currently on the lab servers by disabling 509 SSP for the discovered tests. 510 @param test_args: A dict of args to be seeded in test control file under 511 the name |args_dict|. 512 """ 513 self._cf_getter = cf_getter 514 self._forgiving_parser = forgiving_parser 515 self._run_prod_code = run_prod_code 516 self._test_args = test_args 517 518 519 def retrieve_for_test(self, test_name): 520 """Retrieve a test's control data. 521 522 This ignores forgiving_parser because we cannot return a 523 forgiving value. 524 525 @param test_name: Name of test to retrieve. 526 527 @raises ControlVariableException: There is a syntax error in a 528 control file. 529 530 @returns a ControlData object 531 """ 532 return suite_common.retrieve_control_data_for_test( 533 self._cf_getter, test_name) 534 535 536 def retrieve_for_suite(self, suite_name=''): 537 """Scan through all tests and find all tests. 538 539 @param suite_name: If specified, this method will attempt to restrain 540 the search space to just this suite's control files. 541 542 @raises ControlVariableException: If forgiving_parser is False and there 543 is a syntax error in a control file. 544 545 @returns a dictionary of ControlData objects that based on given 546 parameters. 547 """ 548 tests = suite_common.retrieve_for_suite( 549 self._cf_getter, suite_name, self._forgiving_parser, 550 self._test_args) 551 if self._run_prod_code: 552 for test in six.itervalues(tests): 553 test.require_ssp = False 554 555 return tests 556 557 558def list_all_suites(build, devserver, cf_getter=None): 559 """ 560 Parses all ControlData objects with a SUITE tag and extracts all 561 defined suite names. 562 563 @param build: the build on which we're running this suite. 564 @param devserver: the devserver which contains the build. 565 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to 566 using DevServerGetter. 567 568 @return list of suites 569 """ 570 if cf_getter is None: 571 cf_getter = _create_ds_getter(build, devserver) 572 573 suites = set() 574 predicate = lambda t: True 575 for test in find_and_parse_tests(cf_getter, predicate): 576 suites.update(test.suite_tag_parts) 577 return list(suites) 578 579 580def test_file_similarity_predicate(test_file_pattern): 581 """Returns predicate that gets the similarity based on a test's file 582 name pattern. 583 584 Builds a predicate that takes in a parsed control file (a ControlData) 585 and returns a tuple of (file path, ratio), where ratio is the 586 similarity between the test file name and the given test_file_pattern. 587 588 @param test_file_pattern: regular expression (string) to match against 589 control file names. 590 @return a callable that takes a ControlData and and returns a tuple of 591 (file path, ratio), where ratio is the similarity between the 592 test file name and the given test_file_pattern. 593 """ 594 return lambda t: ((None, 0) if not hasattr(t, 'path') else 595 (t.path, difflib.SequenceMatcher(a=t.path, 596 b=test_file_pattern).ratio())) 597 598 599def test_name_similarity_predicate(test_name): 600 """Returns predicate that matched based on a test's name. 601 602 Builds a predicate that takes in a parsed control file (a ControlData) 603 and returns a tuple of (test name, ratio), where ratio is the similarity 604 between the test name and the given test_name. 605 606 @param test_name: the test name to base the predicate on. 607 @return a callable that takes a ControlData and returns a tuple of 608 (test name, ratio), where ratio is the similarity between the 609 test name and the given test_name. 610 """ 611 return lambda t: ((None, 0) if not hasattr(t, 'name') else 612 (t.name, 613 difflib.SequenceMatcher(a=t.name, b=test_name).ratio())) 614 615 616def matches_attribute_expression_predicate(test_attr_boolstr): 617 """Returns predicate that matches based on boolean expression of 618 attributes. 619 620 Builds a predicate that takes in a parsed control file (a ControlData) 621 ans returns True if the test attributes satisfy the given attribute 622 boolean expression. 623 624 @param test_attr_boolstr: boolean expression of the attributes to be 625 test, like 'system:all and interval:daily'. 626 627 @return a callable that takes a ControlData and returns True if the test 628 attributes satisfy the given boolean expression. 629 """ 630 return lambda t: boolparse_lib.BoolstrResult( 631 test_attr_boolstr, t.attributes) 632 633 634def test_file_matches_pattern_predicate(test_file_pattern): 635 """Returns predicate that matches based on a test's file name pattern. 636 637 Builds a predicate that takes in a parsed control file (a ControlData) 638 and returns True if the test's control file name matches the given 639 regular expression. 640 641 @param test_file_pattern: regular expression (string) to match against 642 control file names. 643 @return a callable that takes a ControlData and and returns 644 True if control file name matches the pattern. 645 """ 646 return lambda t: hasattr(t, 'path') and re.match(test_file_pattern, 647 t.path) 648 649 650def test_name_matches_pattern_predicate(test_name_pattern): 651 """Returns predicate that matches based on a test's name pattern. 652 653 Builds a predicate that takes in a parsed control file (a ControlData) 654 and returns True if the test name matches the given regular expression. 655 656 @param test_name_pattern: regular expression (string) to match against 657 test names. 658 @return a callable that takes a ControlData and returns 659 True if the name fields matches the pattern. 660 """ 661 return lambda t: hasattr(t, 'name') and re.match(test_name_pattern, 662 t.name) 663 664 665def test_name_equals_predicate(test_name): 666 """Returns predicate that matched based on a test's name. 667 668 Builds a predicate that takes in a parsed control file (a ControlData) 669 and returns True if the test name is equal to |test_name|. 670 671 @param test_name: the test name to base the predicate on. 672 @return a callable that takes a ControlData and looks for |test_name| 673 in that ControlData's name. 674 """ 675 return lambda t: hasattr(t, 'name') and test_name == t.name 676 677 678def name_in_tag_similarity_predicate(name): 679 """Returns predicate that takes a control file and gets the similarity 680 of the suites in the control file and the given name. 681 682 Builds a predicate that takes in a parsed control file (a ControlData) 683 and returns a list of tuples of (suite name, ratio), where suite name 684 is each suite listed in the control file, and ratio is the similarity 685 between each suite and the given name. 686 687 @param name: the suite name to base the predicate on. 688 @return a callable that takes a ControlData and returns a list of tuples 689 of (suite name, ratio), where suite name is each suite listed in 690 the control file, and ratio is the similarity between each suite 691 and the given name. 692 """ 693 return lambda t: [(suite, 694 difflib.SequenceMatcher(a=suite, b=name).ratio()) 695 for suite in t.suite_tag_parts] or [(None, 0)] 696 697 698def name_in_tag_predicate(name): 699 """Returns predicate that takes a control file and looks for |name|. 700 701 Builds a predicate that takes in a parsed control file (a ControlData) 702 and returns True if the SUITE tag is present and contains |name|. 703 704 @param name: the suite name to base the predicate on. 705 @return a callable that takes a ControlData and looks for |name| in that 706 ControlData object's suite member. 707 """ 708 return suite_common.name_in_tag_predicate(name) 709 710 711def create_fs_getter(autotest_dir): 712 """ 713 @param autotest_dir: the place to find autotests. 714 @return a FileSystemGetter instance that looks under |autotest_dir|. 715 """ 716 # currently hard-coded places to look for tests. 717 subpaths = ['server/site_tests', 'client/site_tests', 718 'server/tests', 'client/tests'] 719 directories = [os.path.join(autotest_dir, p) for p in subpaths] 720 return control_file_getter.FileSystemGetter(directories) 721 722 723def _create_ds_getter(build, devserver): 724 """ 725 @param build: the build on which we're running this suite. 726 @param devserver: the devserver which contains the build. 727 @return a FileSystemGetter instance that looks under |autotest_dir|. 728 """ 729 return control_file_getter.DevServerGetter(build, devserver) 730 731 732def _non_experimental_tests_predicate(test_data): 733 """Test predicate for non-experimental tests.""" 734 return not test_data.experimental 735 736 737def find_and_parse_tests(cf_getter, predicate, suite_name='', 738 add_experimental=False, forgiving_parser=True, 739 run_prod_code=False, test_args=None): 740 """ 741 Function to scan through all tests and find eligible tests. 742 743 Search through all tests based on given cf_getter, suite_name, 744 add_experimental and forgiving_parser, return the tests that match 745 given predicate. 746 747 @param cf_getter: a control_file_getter.ControlFileGetter used to list 748 and fetch the content of control files 749 @param predicate: a function that should return True when run over a 750 ControlData representation of a control file that should be in 751 this Suite. 752 @param suite_name: If specified, this method will attempt to restrain 753 the search space to just this suite's control files. 754 @param add_experimental: add tests with experimental attribute set. 755 @param forgiving_parser: If False, will raise ControlVariableExceptions 756 if any are encountered when parsing control 757 files. Note that this can raise an exception 758 for syntax errors in unrelated files, because 759 we parse them before applying the predicate. 760 @param run_prod_code: If true, the suite will run the test code that 761 lives in prod aka the test code currently on the 762 lab servers by disabling SSP for the discovered 763 tests. 764 @param test_args: A dict of args to be seeded in test control file. 765 766 @raises ControlVariableException: If forgiving_parser is False and there 767 is a syntax error in a control file. 768 769 @return list of ControlData objects that should be run, with control 770 file text added in |text| attribute. Results are sorted based 771 on the TIME setting in control file, slowest test comes first. 772 """ 773 logging.debug('Getting control file list for suite: %s', suite_name) 774 retriever = _ControlFileRetriever(cf_getter, 775 forgiving_parser=forgiving_parser, 776 run_prod_code=run_prod_code, 777 test_args=test_args) 778 tests = retriever.retrieve_for_suite(suite_name) 779 if not add_experimental: 780 predicate = _ComposedPredicate([predicate, 781 _non_experimental_tests_predicate]) 782 return suite_common.filter_tests(tests, predicate) 783 784 785def find_possible_tests(cf_getter, predicate, suite_name='', count=10): 786 """ 787 Function to scan through all tests and find possible tests. 788 789 Search through all tests based on given cf_getter, suite_name, 790 add_experimental and forgiving_parser. Use the given predicate to 791 calculate the similarity and return the top 10 matches. 792 793 @param cf_getter: a control_file_getter.ControlFileGetter used to list 794 and fetch the content of control files 795 @param predicate: a function that should return a tuple of (name, ratio) 796 when run over a ControlData representation of a control file that 797 should be in this Suite. `name` is the key to be compared, e.g., 798 a suite name or test name. `ratio` is a value between [0,1] 799 indicating the similarity of `name` and the value to be compared. 800 @param suite_name: If specified, this method will attempt to restrain 801 the search space to just this suite's control files. 802 @param count: Number of suggestions to return, default to 10. 803 804 @return list of top names that similar to the given test, sorted by 805 match ratio. 806 """ 807 logging.debug('Getting control file list for suite: %s', suite_name) 808 tests = _ControlFileRetriever(cf_getter).retrieve_for_suite(suite_name) 809 logging.debug('Parsed %s control files.', len(tests)) 810 similarities = {} 811 for test in six.itervalues(tests): 812 ratios = predicate(test) 813 # Some predicates may return a list of tuples, e.g., 814 # name_in_tag_similarity_predicate. Convert all returns to a list. 815 if not isinstance(ratios, list): 816 ratios = [ratios] 817 for name, ratio in ratios: 818 similarities[name] = ratio 819 return [s[0] for s in 820 sorted(list(similarities.items()), key=operator.itemgetter(1), 821 reverse=True)][:count] 822 823 824def _deprecated_suite_method(func): 825 """Decorator for deprecated Suite static methods. 826 827 TODO(ayatane): This is used to decorate functions that are called as 828 static methods on Suite. 829 """ 830 @functools.wraps(func) 831 def wrapper(*args, **kwargs): 832 """Wraps |func| for warning.""" 833 warnings.warn('Calling method "%s" from Suite is deprecated' % 834 func.__name__) 835 return func(*args, **kwargs) 836 return staticmethod(wrapper) 837 838 839class _BaseSuite(object): 840 """ 841 A suite of tests, defined by some predicate over control file variables. 842 843 Given a place to search for control files a predicate to match the desired 844 tests, can gather tests and fire off jobs to run them, and then wait for 845 results. 846 847 @var _predicate: a function that should return True when run over a 848 ControlData representation of a control file that should be in 849 this Suite. 850 @var _tag: a string with which to tag jobs run in this suite. 851 @var _builds: the builds on which we're running this suite. 852 @var _afe: an instance of AFE as defined in server/frontend.py. 853 @var _tko: an instance of TKO as defined in server/frontend.py. 854 @var _jobs: currently scheduled jobs, if any. 855 @var _jobs_to_tests: a dictionary that maps job ids to tests represented 856 ControlData objects. 857 @var _retry: a bool value indicating whether jobs should be retried on 858 failure. 859 @var _retry_handler: a RetryHandler object. 860 861 """ 862 863 864 def __init__( 865 self, 866 tests, 867 tag, 868 builds, 869 board, 870 afe=None, 871 tko=None, 872 pool=None, 873 results_dir=None, 874 max_runtime_mins=24*60, 875 timeout_mins=24*60, 876 file_bugs=False, 877 suite_job_id=None, 878 ignore_deps=False, 879 extra_deps=None, 880 priority=priorities.Priority.DEFAULT, 881 wait_for_results=True, 882 job_retry=False, 883 max_retries=sys.maxsize, 884 offload_failures_only=False, 885 test_source_build=None, 886 job_keyvals=None, 887 child_dependencies=(), 888 result_reporter=None, 889 ): 890 """Initialize instance. 891 892 @param tests: Iterable of tests to run. 893 @param tag: a string with which to tag jobs run in this suite. 894 @param builds: the builds on which we're running this suite. 895 @param board: the board on which we're running this suite. 896 @param afe: an instance of AFE as defined in server/frontend.py. 897 @param tko: an instance of TKO as defined in server/frontend.py. 898 @param pool: Specify the pool of machines to use for scheduling 899 purposes. 900 @param results_dir: The directory where the job can write results to. 901 This must be set if you want job_id of sub-jobs 902 list in the job keyvals. 903 @param max_runtime_mins: Maximum suite runtime, in minutes. 904 @param timeout: Maximum job lifetime, in hours. 905 @param suite_job_id: Job id that will act as parent id to all sub jobs. 906 Default: None 907 @param ignore_deps: True if jobs should ignore the DEPENDENCIES 908 attribute and skip applying of dependency labels. 909 (Default:False) 910 @param extra_deps: A list of strings which are the extra DEPENDENCIES 911 to add to each test being scheduled. 912 @param priority: Integer priority level. Higher is more important. 913 @param wait_for_results: Set to False to run the suite job without 914 waiting for test jobs to finish. Default is 915 True. 916 @param job_retry: A bool value indicating whether jobs should be retried 917 on failure. If True, the field 'JOB_RETRIES' in 918 control files will be respected. If False, do not 919 retry. 920 @param max_retries: Maximum retry limit at suite level. 921 Regardless how many times each individual test 922 has been retried, the total number of retries 923 happening in the suite can't exceed _max_retries. 924 Default to sys.maxint. 925 @param offload_failures_only: Only enable gs_offloading for failed 926 jobs. 927 @param test_source_build: Build that contains the server-side test code. 928 @param job_keyvals: General job keyvals to be inserted into keyval file, 929 which will be used by tko/parse later. 930 @param child_dependencies: (optional) list of dependency strings 931 to be added as dependencies to child jobs. 932 @param result_reporter: A _ResultReporter instance to report results. If 933 None, an _EmailReporter will be created. 934 """ 935 936 self.tests = list(tests) 937 self._tag = tag 938 self._builds = builds 939 self._results_dir = results_dir 940 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30, 941 delay_sec=10, 942 debug=False) 943 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30, 944 delay_sec=10, 945 debug=False) 946 self._jobs = [] 947 self._jobs_to_tests = {} 948 949 self._file_bugs = file_bugs 950 self._suite_job_id = suite_job_id 951 self._job_retry=job_retry 952 self._max_retries = max_retries 953 # RetryHandler to be initialized in schedule() 954 self._retry_handler = None 955 self.wait_for_results = wait_for_results 956 self._job_keyvals = job_keyvals 957 if result_reporter is None: 958 self._result_reporter = _EmailReporter(self) 959 else: 960 self._result_reporter = result_reporter 961 962 if extra_deps is None: 963 extra_deps = [] 964 extra_deps.append(board) 965 if pool: 966 extra_deps.append(pool) 967 extra_deps.extend(child_dependencies) 968 self._dependencies = tuple(extra_deps) 969 970 self._job_creator = _SuiteChildJobCreator( 971 tag=tag, 972 builds=builds, 973 board=board, 974 afe=afe, 975 max_runtime_mins=max_runtime_mins, 976 timeout_mins=timeout_mins, 977 suite_job_id=suite_job_id, 978 ignore_deps=ignore_deps, 979 extra_deps=extra_deps, 980 priority=priority, 981 offload_failures_only=offload_failures_only, 982 test_source_build=test_source_build, 983 job_keyvals=job_keyvals, 984 ) 985 986 987 def _schedule_test(self, record, test, retry_for=None): 988 """Schedule a single test and return the job. 989 990 Schedule a single test by creating a job, and then update relevant 991 data structures that are used to keep track of all running jobs. 992 993 Emits a TEST_NA status log entry if it failed to schedule the test due 994 to NoEligibleHostException or a non-existent board label. 995 996 Returns a frontend.Job object if the test is successfully scheduled. 997 If scheduling failed due to NoEligibleHostException or a non-existent 998 board label, returns None. 999 1000 @param record: A callable to use for logging. 1001 prototype: record(base_job.status_log_entry) 1002 @param test: ControlData for a test to run. 1003 @param retry_for: If we are scheduling a test to retry an 1004 old job, the afe_job_id of the old job 1005 will be passed in as |retry_for|. 1006 1007 @returns: A frontend.Job object or None 1008 """ 1009 msg = 'Scheduling %s' % test.name 1010 if retry_for: 1011 msg = msg + ', to retry afe job %d' % retry_for 1012 logging.debug(msg) 1013 begin_time_str = datetime.datetime.now().strftime(time_utils.TIME_FMT) 1014 try: 1015 job = self._job_creator.create_job(test, retry_for=retry_for) 1016 except (error.NoEligibleHostException, proxy.ValidationError) as e: 1017 if (isinstance(e, error.NoEligibleHostException) 1018 or (isinstance(e, proxy.ValidationError) 1019 and _is_nonexistent_board_error(e))): 1020 # Treat a dependency on a non-existent board label the same as 1021 # a dependency on a board that exists, but for which there's no 1022 # hardware. 1023 logging.debug('%s not applicable for this board/pool. ' 1024 'Emitting TEST_NA.', test.name) 1025 Status('TEST_NA', test.name, 1026 'Skipping: test not supported on this board/pool.', 1027 begin_time_str=begin_time_str).record_all(record) 1028 return None 1029 else: 1030 raise e 1031 except (error.RPCException, proxy.JSONRPCException): 1032 if retry_for: 1033 # Mark that we've attempted to retry the old job. 1034 logging.debug("RPC exception occurred") 1035 self._retry_handler.set_attempted(job_id=retry_for) 1036 raise 1037 else: 1038 self._jobs.append(job) 1039 self._jobs_to_tests[job.id] = test 1040 if retry_for: 1041 # A retry job was just created, record it. 1042 self._retry_handler.add_retry( 1043 old_job_id=retry_for, new_job_id=job.id) 1044 retry_count = (test.job_retries - 1045 self._retry_handler.get_retry_max(job.id)) 1046 logging.debug('Job %d created to retry job %d. ' 1047 'Have retried for %d time(s)', 1048 job.id, retry_for, retry_count) 1049 self._remember_job_keyval(job) 1050 return job 1051 1052 def schedule(self, record): 1053 """ 1054 Schedule jobs using |self._afe|. 1055 1056 frontend.Job objects representing each scheduled job will be put in 1057 |self._jobs|. 1058 1059 @param record: A callable to use for logging. 1060 prototype: record(base_job.status_log_entry) 1061 @returns: The number of tests that were scheduled. 1062 """ 1063 scheduled_test_names = [] 1064 logging.debug('Discovered %d tests.', len(self.tests)) 1065 1066 Status('INFO', 'Start %s' % self._tag).record_result(record) 1067 try: 1068 # Write job_keyvals into keyval file. 1069 if self._job_keyvals: 1070 utils.write_keyval(self._results_dir, self._job_keyvals) 1071 1072 # TODO(crbug.com/730885): This is a hack to protect tests that are 1073 # not usually retried from getting hit by a provision error when run 1074 # as part of a suite. Remove this hack once provision is separated 1075 # out in its own suite. 1076 self._bump_up_test_retries(self.tests) 1077 for test in self.tests: 1078 scheduled_job = self._schedule_test(record, test) 1079 if scheduled_job is not None: 1080 scheduled_test_names.append(test.name) 1081 1082 # Write the num of scheduled tests and name of them to keyval file. 1083 logging.debug('Scheduled %d tests, writing the total to keyval.', 1084 len(scheduled_test_names)) 1085 utils.write_keyval( 1086 self._results_dir, 1087 self._make_scheduled_tests_keyvals(scheduled_test_names)) 1088 except Exception: 1089 logging.exception('Exception while scheduling suite') 1090 Status('FAIL', self._tag, 1091 'Exception while scheduling suite').record_result(record) 1092 1093 if self._job_retry: 1094 logging.debug("Initializing RetryHandler for suite %s.", self._tag) 1095 self._retry_handler = RetryHandler( 1096 initial_jobs_to_tests=self._jobs_to_tests, 1097 max_retries=self._max_retries) 1098 logging.debug("retry map created: %s ", 1099 self._retry_handler._retry_map) 1100 else: 1101 logging.info("Will not retry jobs from suite %s.", self._tag) 1102 return len(scheduled_test_names) 1103 1104 1105 def _bump_up_test_retries(self, tests): 1106 """Bump up individual test retries to match suite retry options.""" 1107 if not self._job_retry: 1108 return 1109 1110 for test in tests: 1111 # We do honor if a test insists on JOB_RETRIES = 0. 1112 if test.job_retries is None: 1113 logging.debug( 1114 'Test %s did not request retries, but suite requires ' 1115 'retries. Bumping retries up to 1. ' 1116 '(See crbug.com/730885)', 1117 test.name) 1118 test.job_retries = 1 1119 1120 1121 def _make_scheduled_tests_keyvals(self, scheduled_test_names): 1122 """Make a keyvals dict to write for scheduled test names. 1123 1124 @param scheduled_test_names: A list of scheduled test name strings. 1125 1126 @returns: A keyvals dict. 1127 """ 1128 return { 1129 constants.SCHEDULED_TEST_COUNT_KEY: len(scheduled_test_names), 1130 constants.SCHEDULED_TEST_NAMES_KEY: repr(scheduled_test_names), 1131 } 1132 1133 1134 def _should_report(self, result): 1135 """ 1136 Returns True if this failure requires to be reported. 1137 1138 @param result: A result, encapsulating the status of the failed job. 1139 @return: True if we should report this failure. 1140 """ 1141 return (self._file_bugs and result.test_executed and 1142 not result.is_testna() and 1143 result.is_worse_than(job_status.Status('GOOD', '', 'reason'))) 1144 1145 1146 def _has_retry(self, result): 1147 """ 1148 Return True if this result gets to retry. 1149 1150 @param result: A result, encapsulating the status of the failed job. 1151 @return: bool 1152 """ 1153 return (self._job_retry 1154 and self._retry_handler.has_following_retry(result)) 1155 1156 1157 def wait(self, record): 1158 """ 1159 Polls for the job statuses, using |record| to print status when each 1160 completes. 1161 1162 @param record: callable that records job status. 1163 prototype: 1164 record(base_job.status_log_entry) 1165 """ 1166 waiter = job_status.JobResultWaiter(self._afe, self._tko) 1167 try: 1168 if self._suite_job_id: 1169 jobs = self._afe.get_jobs(parent_job_id=self._suite_job_id) 1170 else: 1171 logging.warning('Unknown suite_job_id, falling back to less ' 1172 'efficient results_generator.') 1173 jobs = self._jobs 1174 waiter.add_jobs(jobs) 1175 for result in waiter.wait_for_results(): 1176 self._handle_result(result=result, record=record, waiter=waiter) 1177 if self._finished_waiting(): 1178 break 1179 except Exception: # pylint: disable=W0703 1180 logging.exception('Exception waiting for results') 1181 Status('FAIL', self._tag, 1182 'Exception waiting for results').record_result(record) 1183 1184 1185 def _finished_waiting(self): 1186 """Return whether the suite is finished waiting for child jobs.""" 1187 return False 1188 1189 1190 def _handle_result(self, result, record, waiter): 1191 """ 1192 Handle a test job result. 1193 1194 @param result: Status instance for job. 1195 @param record: callable that records job status. 1196 prototype: 1197 record(base_job.status_log_entry) 1198 @param waiter: JobResultsWaiter instance. 1199 1200 @instance_param _result_reporter: _ResultReporter instance. 1201 """ 1202 self._record_result(result, record) 1203 rescheduled = False 1204 if self._job_retry and self._retry_handler._should_retry(result): 1205 rescheduled = self._retry_result(result, record, waiter) 1206 # TODO (crbug.com/751428): If the suite times out before a retry could 1207 # finish, we would lose the chance to report errors from the original 1208 # job. 1209 if self._has_retry(result) and rescheduled: 1210 return 1211 1212 if self._should_report(result): 1213 self._result_reporter.report(result) 1214 1215 def _record_result(self, result, record): 1216 """ 1217 Record a test job result. 1218 1219 @param result: Status instance for job. 1220 @param record: callable that records job status. 1221 prototype: 1222 record(base_job.status_log_entry) 1223 """ 1224 result.record_all(record) 1225 self._remember_job_keyval(result) 1226 1227 1228 def _retry_result(self, result, record, waiter): 1229 """ 1230 Retry a test job result. 1231 1232 @param result: Status instance for job. 1233 @param record: callable that records job status. 1234 prototype: 1235 record(base_job.status_log_entry) 1236 @param waiter: JobResultsWaiter instance. 1237 @returns: True if a job was scheduled for retry, False otherwise. 1238 """ 1239 test = self._jobs_to_tests[result.id] 1240 try: 1241 # It only takes effect for CQ retriable job: 1242 # 1) in first try, test.fast=True. 1243 # 2) in second try, test will be run in normal mode, so reset 1244 # test.fast=False. 1245 test.fast = False 1246 new_job = self._schedule_test( 1247 record=record, test=test, retry_for=result.id) 1248 except (error.RPCException, proxy.JSONRPCException) as e: 1249 logging.error('Failed to schedule test: %s, Reason: %s', 1250 test.name, e) 1251 return False 1252 else: 1253 waiter.add_job(new_job) 1254 return bool(new_job) 1255 1256 @property 1257 def jobs(self): 1258 """Give a copy of the associated jobs 1259 1260 @returns: array of jobs""" 1261 return [job for job in self._jobs] 1262 1263 1264 @property 1265 def _should_file_bugs(self): 1266 """Return whether bugs should be filed. 1267 1268 @returns: bool 1269 """ 1270 # File bug when failure is one of the _FILE_BUG_SUITES, 1271 # otherwise send an email to the owner anc cc. 1272 return self._tag in _FILE_BUG_SUITES 1273 1274 1275 def abort(self): 1276 """ 1277 Abort all scheduled test jobs. 1278 """ 1279 if self._jobs: 1280 job_ids = [job.id for job in self._jobs] 1281 self._afe.run('abort_host_queue_entries', job__id__in=job_ids) 1282 1283 1284 def _remember_job_keyval(self, job): 1285 """ 1286 Record provided job as a suite job keyval, for later referencing. 1287 1288 @param job: some representation of a job that has the attributes: 1289 id, test_name, and owner 1290 """ 1291 if self._results_dir and job.id and job.owner and job.test_name: 1292 job_id_owner = '%s-%s' % (job.id, job.owner) 1293 logging.debug('Adding job keyval for %s=%s', 1294 job.test_name, job_id_owner) 1295 utils.write_keyval( 1296 self._results_dir, 1297 {hashlib.md5(job.test_name).hexdigest(): job_id_owner}) 1298 1299 1300class Suite(_BaseSuite): 1301 """ 1302 A suite of tests, defined by some predicate over control file variables. 1303 1304 Given a place to search for control files a predicate to match the desired 1305 tests, can gather tests and fire off jobs to run them, and then wait for 1306 results. 1307 1308 @var _predicate: a function that should return True when run over a 1309 ControlData representation of a control file that should be in 1310 this Suite. 1311 @var _tag: a string with which to tag jobs run in this suite. 1312 @var _builds: the builds on which we're running this suite. 1313 @var _afe: an instance of AFE as defined in server/frontend.py. 1314 @var _tko: an instance of TKO as defined in server/frontend.py. 1315 @var _jobs: currently scheduled jobs, if any. 1316 @var _jobs_to_tests: a dictionary that maps job ids to tests represented 1317 ControlData objects. 1318 @var _cf_getter: a control_file_getter.ControlFileGetter 1319 @var _retry: a bool value indicating whether jobs should be retried on 1320 failure. 1321 @var _retry_handler: a RetryHandler object. 1322 1323 """ 1324 1325 # TODO(ayatane): These methods are kept on the Suite class for 1326 # backward compatibility. 1327 find_and_parse_tests = _deprecated_suite_method(find_and_parse_tests) 1328 find_possible_tests = _deprecated_suite_method(find_possible_tests) 1329 create_fs_getter = _deprecated_suite_method(create_fs_getter) 1330 name_in_tag_predicate = _deprecated_suite_method( 1331 suite_common.name_in_tag_predicate) 1332 name_in_tag_similarity_predicate = _deprecated_suite_method( 1333 name_in_tag_similarity_predicate) 1334 test_name_equals_predicate = _deprecated_suite_method( 1335 test_name_equals_predicate) 1336 test_name_in_list_predicate = _deprecated_suite_method( 1337 suite_common.test_name_in_list_predicate) 1338 test_name_matches_pattern_predicate = _deprecated_suite_method( 1339 test_name_matches_pattern_predicate) 1340 test_file_matches_pattern_predicate = _deprecated_suite_method( 1341 test_file_matches_pattern_predicate) 1342 matches_attribute_expression_predicate = _deprecated_suite_method( 1343 matches_attribute_expression_predicate) 1344 test_name_similarity_predicate = _deprecated_suite_method( 1345 test_name_similarity_predicate) 1346 test_file_similarity_predicate = _deprecated_suite_method( 1347 test_file_similarity_predicate) 1348 list_all_suites = _deprecated_suite_method(list_all_suites) 1349 get_test_source_build = _deprecated_suite_method( 1350 suite_common.get_test_source_build) 1351 1352 1353 @classmethod 1354 def create_from_predicates(cls, predicates, builds, board, devserver, 1355 cf_getter=None, name='ad_hoc_suite', 1356 run_prod_code=False, **dargs): 1357 """ 1358 Create a Suite using a given predicate test filters. 1359 1360 Uses supplied predicate(s) to instantiate a Suite. Looks for tests in 1361 |autotest_dir| and will schedule them using |afe|. Pulls control files 1362 from the default dev server. Results will be pulled from |tko| upon 1363 completion. 1364 1365 @param predicates: A list of callables that accept ControlData 1366 representations of control files. A test will be 1367 included in suite if all callables in this list 1368 return True on the given control file. 1369 @param builds: the builds on which we're running this suite. It's a 1370 dictionary of version_prefix:build. 1371 @param board: the board on which we're running this suite. 1372 @param devserver: the devserver which contains the build. 1373 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to 1374 using DevServerGetter. 1375 @param name: name of suite. Defaults to 'ad_hoc_suite' 1376 @param run_prod_code: If true, the suite will run the tests that 1377 lives in prod aka the test code currently on the 1378 lab servers. 1379 @param **dargs: Any other Suite constructor parameters, as described 1380 in Suite.__init__ docstring. 1381 @return a Suite instance. 1382 """ 1383 if cf_getter is None: 1384 if run_prod_code: 1385 cf_getter = create_fs_getter(_AUTOTEST_DIR) 1386 else: 1387 build = suite_common.get_test_source_build(builds, **dargs) 1388 cf_getter = _create_ds_getter(build, devserver) 1389 1390 return cls(predicates, 1391 name, builds, board, cf_getter, run_prod_code, **dargs) 1392 1393 1394 @classmethod 1395 def create_from_name(cls, name, builds, board, devserver, cf_getter=None, 1396 **dargs): 1397 """ 1398 Create a Suite using a predicate based on the SUITE control file var. 1399 1400 Makes a predicate based on |name| and uses it to instantiate a Suite 1401 that looks for tests in |autotest_dir| and will schedule them using 1402 |afe|. Pulls control files from the default dev server. 1403 Results will be pulled from |tko| upon completion. 1404 1405 @param name: a value of the SUITE control file variable to search for. 1406 @param builds: the builds on which we're running this suite. It's a 1407 dictionary of version_prefix:build. 1408 @param board: the board on which we're running this suite. 1409 @param devserver: the devserver which contains the build. 1410 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to 1411 using DevServerGetter. 1412 @param **dargs: Any other Suite constructor parameters, as described 1413 in Suite.__init__ docstring. 1414 @return a Suite instance. 1415 """ 1416 if cf_getter is None: 1417 build = suite_common.get_test_source_build(builds, **dargs) 1418 cf_getter = _create_ds_getter(build, devserver) 1419 1420 return cls([suite_common.name_in_tag_predicate(name)], 1421 name, builds, board, cf_getter, **dargs) 1422 1423 1424 def __init__( 1425 self, 1426 predicates, 1427 tag, 1428 builds, 1429 board, 1430 cf_getter, 1431 run_prod_code=False, 1432 afe=None, 1433 tko=None, 1434 pool=None, 1435 results_dir=None, 1436 max_runtime_mins=24*60, 1437 timeout_mins=24*60, 1438 file_bugs=False, 1439 suite_job_id=None, 1440 ignore_deps=False, 1441 extra_deps=None, 1442 priority=priorities.Priority.DEFAULT, 1443 forgiving_parser=True, 1444 wait_for_results=True, 1445 job_retry=False, 1446 max_retries=sys.maxsize, 1447 offload_failures_only=False, 1448 test_source_build=None, 1449 job_keyvals=None, 1450 test_args=None, 1451 child_dependencies=(), 1452 result_reporter=None, 1453 ): 1454 """ 1455 Constructor 1456 1457 @param predicates: A list of callables that accept ControlData 1458 representations of control files. A test will be 1459 included in suite if all callables in this list 1460 return True on the given control file. 1461 @param tag: a string with which to tag jobs run in this suite. 1462 @param builds: the builds on which we're running this suite. 1463 @param board: the board on which we're running this suite. 1464 @param cf_getter: a control_file_getter.ControlFileGetter 1465 @param afe: an instance of AFE as defined in server/frontend.py. 1466 @param tko: an instance of TKO as defined in server/frontend.py. 1467 @param pool: Specify the pool of machines to use for scheduling 1468 purposes. 1469 @param run_prod_code: If true, the suite will run the test code that 1470 lives in prod aka the test code currently on the 1471 lab servers. 1472 @param results_dir: The directory where the job can write results to. 1473 This must be set if you want job_id of sub-jobs 1474 list in the job keyvals. 1475 @param max_runtime_mins: Maximum suite runtime, in minutes. 1476 @param timeout: Maximum job lifetime, in hours. 1477 @param suite_job_id: Job id that will act as parent id to all sub jobs. 1478 Default: None 1479 @param ignore_deps: True if jobs should ignore the DEPENDENCIES 1480 attribute and skip applying of dependency labels. 1481 (Default:False) 1482 @param extra_deps: A list of strings which are the extra DEPENDENCIES 1483 to add to each test being scheduled. 1484 @param priority: Integer priority level. Higher is more important. 1485 @param wait_for_results: Set to False to run the suite job without 1486 waiting for test jobs to finish. Default is 1487 True. 1488 @param job_retry: A bool value indicating whether jobs should be retried 1489 on failure. If True, the field 'JOB_RETRIES' in 1490 control files will be respected. If False, do not 1491 retry. 1492 @param max_retries: Maximum retry limit at suite level. 1493 Regardless how many times each individual test 1494 has been retried, the total number of retries 1495 happening in the suite can't exceed _max_retries. 1496 Default to sys.maxint. 1497 @param offload_failures_only: Only enable gs_offloading for failed 1498 jobs. 1499 @param test_source_build: Build that contains the server-side test code. 1500 @param job_keyvals: General job keyvals to be inserted into keyval file, 1501 which will be used by tko/parse later. 1502 @param test_args: A dict of args passed all the way to each individual 1503 test that will be actually ran. 1504 @param child_dependencies: (optional) list of dependency strings 1505 to be added as dependencies to child jobs. 1506 @param result_reporter: A _ResultReporter instance to report results. If 1507 None, an _EmailReporter will be created. 1508 """ 1509 tests = find_and_parse_tests( 1510 cf_getter, 1511 _ComposedPredicate(predicates), 1512 tag, 1513 forgiving_parser=forgiving_parser, 1514 run_prod_code=run_prod_code, 1515 test_args=test_args, 1516 ) 1517 super(Suite, self).__init__( 1518 tests=tests, 1519 tag=tag, 1520 builds=builds, 1521 board=board, 1522 afe=afe, 1523 tko=tko, 1524 pool=pool, 1525 results_dir=results_dir, 1526 max_runtime_mins=max_runtime_mins, 1527 timeout_mins=timeout_mins, 1528 file_bugs=file_bugs, 1529 suite_job_id=suite_job_id, 1530 ignore_deps=ignore_deps, 1531 extra_deps=extra_deps, 1532 priority=priority, 1533 wait_for_results=wait_for_results, 1534 job_retry=job_retry, 1535 max_retries=max_retries, 1536 offload_failures_only=offload_failures_only, 1537 test_source_build=test_source_build, 1538 job_keyvals=job_keyvals, 1539 child_dependencies=child_dependencies, 1540 result_reporter=result_reporter, 1541 ) 1542 1543 1544class ProvisionSuite(_BaseSuite): 1545 """ 1546 A suite for provisioning DUTs. 1547 1548 This is done by creating dummy_Pass tests. 1549 """ 1550 1551 1552 def __init__( 1553 self, 1554 tag, 1555 builds, 1556 board, 1557 devserver, 1558 num_required, 1559 num_max=float('inf'), 1560 cf_getter=None, 1561 run_prod_code=False, 1562 test_args=None, 1563 test_source_build=None, 1564 **kwargs): 1565 """ 1566 Constructor 1567 1568 @param tag: a string with which to tag jobs run in this suite. 1569 @param builds: the builds on which we're running this suite. 1570 @param board: the board on which we're running this suite. 1571 @param devserver: the devserver which contains the build. 1572 @param num_required: number of tests that must pass. This is 1573 capped by the number of tests that are run. 1574 @param num_max: max number of tests to make. By default there 1575 is no cap, a test is created for each eligible host. 1576 @param cf_getter: a control_file_getter.ControlFileGetter. 1577 @param test_args: A dict of args passed all the way to each individual 1578 test that will be actually ran. 1579 @param test_source_build: Build that contains the server-side test code. 1580 @param kwargs: Various keyword arguments passed to 1581 _BaseSuite constructor. 1582 """ 1583 super(ProvisionSuite, self).__init__( 1584 tests=[], 1585 tag=tag, 1586 builds=builds, 1587 board=board, 1588 **kwargs) 1589 self._num_successful = 0 1590 self._num_required = 0 1591 self.tests = [] 1592 1593 static_deps = [dep for dep in self._dependencies 1594 if not provision.Provision.acts_on(dep)] 1595 if 'pool:suites' in static_deps: 1596 logging.info('Provision suite is disabled on suites pool') 1597 return 1598 logging.debug('Looking for hosts matching %r', static_deps) 1599 hosts = self._afe.get_hosts( 1600 invalid=False, multiple_labels=static_deps) 1601 logging.debug('Found %d matching hosts for ProvisionSuite', len(hosts)) 1602 available_hosts = [h for h in hosts if h.is_available()] 1603 logging.debug('Found %d available hosts for ProvisionSuite', 1604 len(available_hosts)) 1605 dummy_test = _load_dummy_test( 1606 builds, devserver, cf_getter, 1607 run_prod_code, test_args, test_source_build) 1608 self.tests = [dummy_test] * min(len(available_hosts), num_max) 1609 logging.debug('Made %d tests for ProvisionSuite', len(self.tests)) 1610 self._num_required = min(num_required, len(self.tests)) 1611 logging.debug('Expecting %d tests to pass for ProvisionSuite', 1612 self._num_required) 1613 1614 def _handle_result(self, result, record, waiter): 1615 super(ProvisionSuite, self)._handle_result(result, record, waiter) 1616 if result.is_good(): 1617 self._num_successful += 1 1618 1619 def _finished_waiting(self): 1620 return self._num_successful >= self._num_required 1621 1622 1623def _load_dummy_test( 1624 builds, 1625 devserver, 1626 cf_getter=None, 1627 run_prod_code=False, 1628 test_args=None, 1629 test_source_build=None): 1630 """ 1631 Load and return the dummy pass test. 1632 1633 @param builds: the builds on which we're running this suite. 1634 @param devserver: the devserver which contains the build. 1635 @param cf_getter: a control_file_getter.ControlFileGetter. 1636 @param test_args: A dict of args passed all the way to each individual 1637 test that will be actually ran. 1638 @param test_source_build: Build that contains the server-side test code. 1639 """ 1640 if cf_getter is None: 1641 if run_prod_code: 1642 cf_getter = create_fs_getter(_AUTOTEST_DIR) 1643 else: 1644 build = suite_common.get_test_source_build( 1645 builds, test_source_build=test_source_build) 1646 devserver.stage_artifacts(image=build, 1647 artifacts=['control_files']) 1648 cf_getter = _create_ds_getter(build, devserver) 1649 retriever = _ControlFileRetriever(cf_getter, 1650 run_prod_code=run_prod_code, 1651 test_args=test_args) 1652 return retriever.retrieve_for_test('dummy_Pass') 1653 1654 1655class _ComposedPredicate(object): 1656 """Return the composition of the predicates. 1657 1658 Predicates are functions that take a test control data object and 1659 return True of that test is to be included. The returned 1660 predicate's set is the intersection of all of the input predicates' 1661 sets (it returns True if all predicates return True). 1662 """ 1663 1664 def __init__(self, predicates): 1665 """Initialize instance. 1666 1667 @param predicates: Iterable of predicates. 1668 """ 1669 self._predicates = list(predicates) 1670 1671 def __repr__(self): 1672 return '{cls}({this._predicates!r})'.format( 1673 cls=type(self).__name__, 1674 this=self, 1675 ) 1676 1677 def __call__(self, control_data_): 1678 return all(f(control_data_) for f in self._predicates) 1679 1680 1681def _is_nonexistent_board_error(e): 1682 """Return True if error is caused by nonexistent board label. 1683 1684 As of this writing, the particular case we want looks like this: 1685 1686 1) e.problem_keys is a dictionary 1687 2) e.problem_keys['meta_hosts'] exists as the only key 1688 in the dictionary. 1689 3) e.problem_keys['meta_hosts'] matches this pattern: 1690 "Label "board:.*" not found" 1691 1692 We check for conditions 1) and 2) on the 1693 theory that they're relatively immutable. 1694 We don't check condition 3) because it seems 1695 likely to be a maintenance burden, and for the 1696 times when we're wrong, being right shouldn't 1697 matter enough (we _hope_). 1698 1699 @param e: proxy.ValidationError instance 1700 @returns: boolean 1701 """ 1702 return (isinstance(e.problem_keys, dict) 1703 and len(e.problem_keys) == 1 1704 and 'meta_hosts' in e.problem_keys) 1705 1706 1707class _ResultReporter(six.with_metaclass(abc.ABCMeta, object)): 1708 """Abstract base class for reporting test results. 1709 1710 Usually, this is used to report test failures. 1711 """ 1712 1713 @abc.abstractmethod 1714 def report(self, result): 1715 """Report test result. 1716 1717 @param result: Status instance for job. 1718 """ 1719 1720 1721class _EmailReporter(_ResultReporter): 1722 """Class that emails based on test failures.""" 1723 1724 def __init__(self, suite, bug_template=None): 1725 self._suite = suite 1726 self._bug_template = bug_template or {} 1727 1728 def _get_test_bug(self, result): 1729 """Get TestBug for the given result. 1730 1731 @param result: Status instance for a test job. 1732 @returns: TestBug instance. 1733 """ 1734 # reporting modules have dependency on external packages, e.g., httplib2 1735 # Such dependency can cause issue to any module tries to import suite.py 1736 # without building site-packages first. Since the reporting modules are 1737 # only used in this function, move the imports here avoid the 1738 # requirement of building site packages to use other functions in this 1739 # module. 1740 from autotest_lib.server.cros.dynamic_suite import reporting 1741 1742 job_views = self._suite._tko.run('get_detailed_test_views', 1743 afe_job_id=result.id) 1744 return reporting.TestBug(self._suite._job_creator.cros_build, 1745 utils.get_chrome_version(job_views), 1746 self._suite._tag, 1747 result) 1748 1749 def _get_bug_template(self, result): 1750 """Get BugTemplate for test job. 1751 1752 @param result: Status instance for job. 1753 @param bug_template: A template dictionary specifying the default bug 1754 filing options for failures in this suite. 1755 @returns: BugTemplate instance 1756 """ 1757 # reporting modules have dependency on external packages, e.g., httplib2 1758 # Such dependency can cause issue to any module tries to import suite.py 1759 # without building site-packages first. Since the reporting modules are 1760 # only used in this function, move the imports here avoid the 1761 # requirement of building site packages to use other functions in this 1762 # module. 1763 from autotest_lib.server.cros.dynamic_suite import reporting_utils 1764 1765 # Try to merge with bug template in test control file. 1766 template = reporting_utils.BugTemplate(self._bug_template) 1767 try: 1768 test_data = self._suite._jobs_to_tests[result.id] 1769 return template.finalize_bug_template( 1770 test_data.bug_template) 1771 except AttributeError: 1772 # Test control file does not have bug template defined. 1773 return template.bug_template 1774 except reporting_utils.InvalidBugTemplateException as e: 1775 logging.error('Merging bug templates failed with ' 1776 'error: %s An empty bug template will ' 1777 'be used.', e) 1778 return {} 1779 1780 def report(self, result): 1781 # reporting modules have dependency on external 1782 # packages, e.g., httplib2 Such dependency can cause 1783 # issue to any module tries to import suite.py without 1784 # building site-packages first. Since the reporting 1785 # modules are only used in this function, move the 1786 # imports here avoid the requirement of building site 1787 # packages to use other functions in this module. 1788 from autotest_lib.server.cros.dynamic_suite import reporting 1789 1790 reporting.send_email( 1791 self._get_test_bug(result), 1792 self._get_bug_template(result)) 1793