• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import abc
6import datetime
7import difflib
8import functools
9import hashlib
10import logging
11import operator
12import os
13import re
14import sys
15import warnings
16
17import common
18
19from autotest_lib.frontend.afe.json_rpc import proxy
20from autotest_lib.client.common_lib import enum
21from autotest_lib.client.common_lib import error
22from autotest_lib.client.common_lib import global_config
23from autotest_lib.client.common_lib import priorities
24from autotest_lib.client.common_lib import time_utils
25from autotest_lib.client.common_lib import utils
26from autotest_lib.frontend.afe import model_attributes
27from autotest_lib.frontend.afe.json_rpc import proxy
28from autotest_lib.server.cros import provision
29from autotest_lib.server.cros.dynamic_suite import constants
30from autotest_lib.server.cros.dynamic_suite import control_file_getter
31from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
32from autotest_lib.server.cros.dynamic_suite import job_status
33from autotest_lib.server.cros.dynamic_suite import suite_common
34from autotest_lib.server.cros.dynamic_suite import tools
35from autotest_lib.server.cros.dynamic_suite.job_status import Status
36
37try:
38    from autotest_lib.server.cros.dynamic_suite import boolparse_lib
39except ImportError as e:
40    print 'Unable to import boolparse_lib: %s' % (e,)
41    print 'This script must be either:'
42    print '  - Be run in the chroot.'
43    print '  - (not yet supported) be run after running '
44    print '    ../utils/build_externals.py'
45
46_FILE_BUG_SUITES = ['au', 'bvt', 'bvt-cq', 'bvt-inline', 'paygen_au_beta',
47                    'paygen_au_canary', 'paygen_au_dev', 'paygen_au_stable',
48                    'sanity', 'push_to_prod']
49_AUTOTEST_DIR = global_config.global_config.get_config_value(
50        'SCHEDULER', 'drone_installation_directory')
51
52
53class RetryHandler(object):
54    """Maintain retry information.
55
56    @var _retry_map: A dictionary that stores retry history.
57            The key is afe job id. The value is a dictionary.
58            {job_id: {'state':RetryHandler.States, 'retry_max':int}}
59            - state:
60                The retry state of a job.
61                NOT_ATTEMPTED:
62                    We haven't done anything about the job.
63                ATTEMPTED:
64                    We've made an attempt to schedule a retry job. The
65                    scheduling may or may not be successful, e.g.
66                    it might encounter an rpc error. Note failure
67                    in scheduling a retry is different from a retry job failure.
68                    For each job, we only attempt to schedule a retry once.
69                    For example, assume we have a test with JOB_RETRIES=5 and
70                    its second retry job failed. When we attempt to create
71                    a third retry job to retry the second, we hit an rpc
72                    error. In such case, we will give up on all following
73                    retries.
74                RETRIED:
75                    A retry job has already been successfully
76                    scheduled.
77            - retry_max:
78                The maximum of times the job can still
79                be retried, taking into account retries
80                that have occurred.
81    @var _retry_level: A retry might be triggered only if the result
82            is worse than the level.
83    @var _max_retries: Maximum retry limit at suite level.
84                     Regardless how many times each individual test
85                     has been retried, the total number of retries happening in
86                     the suite can't exceed _max_retries.
87    """
88
89    States = enum.Enum('NOT_ATTEMPTED', 'ATTEMPTED', 'RETRIED',
90                       start_value=1, step=1)
91
92    def __init__(self, initial_jobs_to_tests, retry_level='WARN',
93                 max_retries=None):
94        """Initialize RetryHandler.
95
96        @param initial_jobs_to_tests: A dictionary that maps a job id to
97                a ControlData object. This dictionary should contain
98                jobs that are originally scheduled by the suite.
99        @param retry_level: A retry might be triggered only if the result is
100                worse than the level.
101        @param max_retries: Integer, maxmium total retries allowed
102                                  for the suite. Default to None, no max.
103        """
104        self._retry_map = {}
105        self._retry_level = retry_level
106        self._max_retries = (max_retries
107                             if max_retries is not None else sys.maxint)
108        for job_id, test in initial_jobs_to_tests.items():
109            if test.job_retries > 0:
110                self._add_job(new_job_id=job_id,
111                              retry_max=test.job_retries)
112            else:
113                logging.debug("Test %s has no retries", test.name)
114
115
116    def _add_job(self, new_job_id, retry_max):
117        """Add a newly-created job to the retry map.
118
119        @param new_job_id: The afe_job_id of a newly created job.
120        @param retry_max: The maximum of times that we could retry
121                          the test if the job fails.
122
123        @raises ValueError if new_job_id is already in retry map.
124
125        """
126        if new_job_id in self._retry_map:
127            raise ValueError('add_job called when job is already in retry map.')
128
129        self._retry_map[new_job_id] = {
130                'state': self.States.NOT_ATTEMPTED,
131                'retry_max': retry_max}
132
133
134    def _suite_max_reached(self):
135        """Return whether maximum retry limit for a suite has been reached."""
136        return self._max_retries <= 0
137
138
139    def add_retry(self, old_job_id, new_job_id):
140        """Record a retry.
141
142        Update retry map with the retry information.
143
144        @param old_job_id: The afe_job_id of the job that is retried.
145        @param new_job_id: The afe_job_id of the retry job.
146
147        @raises KeyError if old_job_id isn't in the retry map.
148        @raises ValueError if we have already retried or made an attempt
149                to retry the old job.
150
151        """
152        old_record = self._retry_map[old_job_id]
153        if old_record['state'] != self.States.NOT_ATTEMPTED:
154            raise ValueError(
155                    'We have already retried or attempted to retry job %d' %
156                    old_job_id)
157        old_record['state'] = self.States.RETRIED
158        self._add_job(new_job_id=new_job_id,
159                      retry_max=old_record['retry_max'] - 1)
160        self._max_retries -= 1
161
162
163    def set_attempted(self, job_id):
164        """Set the state of the job to ATTEMPTED.
165
166        @param job_id: afe_job_id of a job.
167
168        @raises KeyError if job_id isn't in the retry map.
169        @raises ValueError if the current state is not NOT_ATTEMPTED.
170
171        """
172        current_state = self._retry_map[job_id]['state']
173        if current_state != self.States.NOT_ATTEMPTED:
174            # We are supposed to retry or attempt to retry each job
175            # only once. Raise an error if this is not the case.
176            raise ValueError('Unexpected state transition: %s -> %s' %
177                             (self.States.get_string(current_state),
178                              self.States.get_string(self.States.ATTEMPTED)))
179        else:
180            self._retry_map[job_id]['state'] = self.States.ATTEMPTED
181
182
183    def has_following_retry(self, result):
184        """Check whether there will be a following retry.
185
186        We have the following cases for a given job id (result.id),
187        - no retry map entry -> retry not required, no following retry
188        - has retry map entry:
189            - already retried -> has following retry
190            - has not retried
191                (this branch can be handled by checking should_retry(result))
192                - retry_max == 0 --> the last retry job, no more retry
193                - retry_max > 0
194                   - attempted, but has failed in scheduling a
195                     following retry due to rpc error  --> no more retry
196                   - has not attempped --> has following retry if test failed.
197
198        @param result: A result, encapsulating the status of the job.
199
200        @returns: True, if there will be a following retry.
201                  False otherwise.
202
203        """
204        return (result.test_executed
205                and result.id in self._retry_map
206                and (self._retry_map[result.id]['state'] == self.States.RETRIED
207                     or self._should_retry(result)))
208
209
210    def _should_retry(self, result):
211        """Check whether we should retry a job based on its result.
212
213        We will retry the job that corresponds to the result
214        when all of the following are true.
215        a) The test was actually executed, meaning that if
216           a job was aborted before it could ever reach the state
217           of 'Running', the job will not be retried.
218        b) The result is worse than |self._retry_level| which
219           defaults to 'WARN'.
220        c) The test requires retry, i.e. the job has an entry in the retry map.
221        d) We haven't made any retry attempt yet, i.e. state == NOT_ATTEMPTED
222           Note that if a test has JOB_RETRIES=5, and the second time
223           it was retried it hit an rpc error, we will give up on
224           all following retries.
225        e) The job has not reached its retry max, i.e. retry_max > 0
226
227        @param result: A result, encapsulating the status of the job.
228
229        @returns: True if we should retry the job.
230
231        """
232        return (
233            result.test_executed
234            and result.id in self._retry_map
235            and not self._suite_max_reached()
236            and result.is_worse_than(
237                job_status.Status(self._retry_level, '', 'reason'))
238            and self._retry_map[result.id]['state'] == self.States.NOT_ATTEMPTED
239            and self._retry_map[result.id]['retry_max'] > 0
240        )
241
242    def _should_retry_local_job(self, job_id):
243        """Check whether we should retry a job based on information available
244        for a local job without a Result object.
245
246        We will retry the job that corresponds to the result
247        when all of the following are true.
248        a) The test requires retry, i.e. the job has an entry in the retry map.
249        b) We haven't made any retry attempt yet for this job, i.e.
250           state == NOT_ATTEMPTED
251           If the job is aborted,  we will give up on all following retries,
252           regardless of max_retries.
253        c) The job has not reached its retry max, i.e. retry_max > 0
254
255        @param job_id: the id for the job, to look up relevant information.
256
257        @returns: True if we should retry the job.
258
259        """
260        if self._suite_max_reached():
261            logging.debug('suite max_retries reached, not retrying.')
262            return False
263        if job_id not in self._retry_map:
264            logging.debug('job_id not in retry map, not retrying.')
265            return False
266        if self._retry_map[job_id]['state'] != self.States.NOT_ATTEMPTED:
267            logging.debug("job state was %s not 'Not Attempted', not retrying",
268                          self._retry_map[job_id]['state'])
269            return False
270        if self._retry_map[job_id]['retry_max'] <= 0:
271            logging.debug('test-level retries exhausted, not retrying')
272            return False
273        return True
274
275
276    def job_present(self, job_id):
277        """Check whether a job id present in the retry map.
278
279        @param job_id: afe_job_id of a job.
280
281        @returns: A True if the job is present, False if not.
282        """
283        return bool(self._retry_map.get(job_id))
284
285
286
287    def get_retry_max(self, job_id):
288        """Get the maximum times the job can still be retried.
289
290        @param job_id: afe_job_id of a job.
291
292        @returns: An int, representing the maximum times the job can still be
293                  retried.
294        @raises KeyError if job_id isn't in the retry map.
295
296        """
297        return self._retry_map[job_id]['retry_max']
298
299
300class _SuiteChildJobCreator(object):
301    """Create test jobs for a suite."""
302
303    def __init__(
304            self,
305            tag,
306            builds,
307            board,
308            afe=None,
309            max_runtime_mins=24*60,
310            timeout_mins=24*60,
311            suite_job_id=None,
312            ignore_deps=False,
313            extra_deps=(),
314            priority=priorities.Priority.DEFAULT,
315            offload_failures_only=False,
316            test_source_build=None,
317            job_keyvals=None,
318    ):
319        """
320        Constructor
321
322        @param tag: a string with which to tag jobs run in this suite.
323        @param builds: the builds on which we're running this suite.
324        @param board: the board on which we're running this suite.
325        @param afe: an instance of AFE as defined in server/frontend.py.
326        @param max_runtime_mins: Maximum suite runtime, in minutes.
327        @param timeout_mins: Maximum job lifetime, in minutes.
328        @param suite_job_id: Job id that will act as parent id to all sub jobs.
329                             Default: None
330        @param ignore_deps: True if jobs should ignore the DEPENDENCIES
331                            attribute and skip applying of dependency labels.
332                            (Default:False)
333        @param extra_deps: A list of strings which are the extra DEPENDENCIES
334                           to add to each test being scheduled.
335        @param priority: Integer priority level.  Higher is more important.
336        @param offload_failures_only: Only enable gs_offloading for failed
337                                      jobs.
338        @param test_source_build: Build that contains the server-side test code.
339        @param job_keyvals: General job keyvals to be inserted into keyval file,
340                            which will be used by tko/parse later.
341        """
342        self._tag = tag
343        self._builds = builds
344        self._board = board
345        self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
346                                                         delay_sec=10,
347                                                         debug=False)
348        self._max_runtime_mins = max_runtime_mins
349        self._timeout_mins = timeout_mins
350        self._suite_job_id = suite_job_id
351        self._ignore_deps = ignore_deps
352        self._extra_deps = tuple(extra_deps)
353        self._priority = priority
354        self._offload_failures_only = offload_failures_only
355        self._test_source_build = test_source_build
356        self._job_keyvals = job_keyvals
357
358
359    @property
360    def cros_build(self):
361        """Return the CrOS build or the first build in the builds dict."""
362        # TODO(ayatane): Note that the builds dict isn't ordered.  I'm not
363        # sure what the implications of this are, but it's probably not a
364        # good thing.
365        return self._builds.get(provision.CROS_VERSION_PREFIX,
366                                self._builds.values()[0])
367
368
369    def create_job(self, test, retry_for=None):
370        """
371        Thin wrapper around frontend.AFE.create_job().
372
373        @param test: ControlData object for a test to run.
374        @param retry_for: If the to-be-created job is a retry for an
375                          old job, the afe_job_id of the old job will
376                          be passed in as |retry_for|, which will be
377                          recorded in the new job's keyvals.
378        @returns: A frontend.Job object with an added test_name member.
379                  test_name is used to preserve the higher level TEST_NAME
380                  name of the job.
381        """
382        # For a system running multiple suites which share tests, the priority
383        # overridden may lead to unexpected scheduling order that adds extra
384        # provision jobs.
385        test_priority = self._priority
386        if utils.is_moblab():
387            test_priority = max(self._priority, test.priority)
388
389        reboot_before = (model_attributes.RebootBefore.NEVER if test.fast
390                         else None)
391
392        test_obj = self._afe.create_job(
393            control_file=test.text,
394            name=tools.create_job_name(
395                    self._test_source_build or self.cros_build,
396                    self._tag,
397                    test.name),
398            control_type=test.test_type.capitalize(),
399            meta_hosts=[self._board]*test.sync_count,
400            dependencies=self._create_job_deps(test),
401            keyvals=self._create_keyvals_for_test_job(test, retry_for),
402            max_runtime_mins=self._max_runtime_mins,
403            timeout_mins=self._timeout_mins,
404            parent_job_id=self._suite_job_id,
405            reboot_before=reboot_before,
406            run_reset=not test.fast,
407            priority=test_priority,
408            synch_count=test.sync_count,
409            require_ssp=test.require_ssp)
410
411        test_obj.test_name = test.name
412        return test_obj
413
414
415    def _create_job_deps(self, test):
416        """Create job deps list for a test job.
417
418        @returns: A list of dependency strings.
419        """
420        if self._ignore_deps:
421            job_deps = []
422        else:
423            job_deps = list(test.dependencies)
424        job_deps.extend(self._extra_deps)
425        return job_deps
426
427
428    def _create_keyvals_for_test_job(self, test, retry_for=None):
429        """Create keyvals dict for creating a test job.
430
431        @param test: ControlData object for a test to run.
432        @param retry_for: If the to-be-created job is a retry for an
433                          old job, the afe_job_id of the old job will
434                          be passed in as |retry_for|, which will be
435                          recorded in the new job's keyvals.
436        @returns: A keyvals dict for creating the test job.
437        """
438        keyvals = {
439            constants.JOB_BUILD_KEY: self.cros_build,
440            constants.JOB_SUITE_KEY: self._tag,
441            constants.JOB_EXPERIMENTAL_KEY: test.experimental,
442            constants.JOB_BUILDS_KEY: self._builds
443        }
444        # test_source_build is saved to job_keyvals so scheduler can retrieve
445        # the build name from database when compiling autoserv commandline.
446        # This avoid a database change to add a new field in afe_jobs.
447        #
448        # Only add `test_source_build` to job keyvals if the build is different
449        # from the CrOS build or the job uses more than one build, e.g., both
450        # firmware and CrOS will be updated in the dut.
451        # This is for backwards compatibility, so the update Autotest code can
452        # compile an autoserv command line to run in a SSP container using
453        # previous builds.
454        if (self._test_source_build and
455            (self.cros_build != self._test_source_build or
456             len(self._builds) > 1)):
457            keyvals[constants.JOB_TEST_SOURCE_BUILD_KEY] = \
458                    self._test_source_build
459            for prefix, build in self._builds.iteritems():
460                if prefix == provision.FW_RW_VERSION_PREFIX:
461                    keyvals[constants.FWRW_BUILD]= build
462                elif prefix == provision.FW_RO_VERSION_PREFIX:
463                    keyvals[constants.FWRO_BUILD] = build
464        # Add suite job id to keyvals so tko parser can read it from keyval
465        # file.
466        if self._suite_job_id:
467            keyvals[constants.PARENT_JOB_ID] = self._suite_job_id
468        # We drop the old job's id in the new job's keyval file so that
469        # later our tko parser can figure out the retry relationship and
470        # invalidate the results of the old job in tko database.
471        if retry_for:
472            keyvals[constants.RETRY_ORIGINAL_JOB_ID] = retry_for
473        if self._offload_failures_only:
474            keyvals[constants.JOB_OFFLOAD_FAILURES_KEY] = True
475        if self._job_keyvals:
476            for key in constants.INHERITED_KEYVALS:
477                if key in self._job_keyvals:
478                    keyvals[key] = self._job_keyvals[key]
479        return keyvals
480
481
482class _ControlFileRetriever(object):
483    """Retrieves control files.
484
485    This returns control data instances, unlike control file getters
486    which simply return the control file text contents.
487    """
488
489    def __init__(self, cf_getter, forgiving_parser=True, run_prod_code=False,
490                 test_args=None):
491        """Initialize instance.
492
493        @param cf_getter: a control_file_getter.ControlFileGetter used to list
494               and fetch the content of control files
495        @param forgiving_parser: If False, will raise ControlVariableExceptions
496                                 if any are encountered when parsing control
497                                 files. Note that this can raise an exception
498                                 for syntax errors in unrelated files, because
499                                 we parse them before applying the predicate.
500        @param run_prod_code: If true, the retrieved tests will run the test
501                              code that lives in prod aka the test code
502                              currently on the lab servers by disabling
503                              SSP for the discovered tests.
504        @param test_args: A dict of args to be seeded in test control file under
505                          the name |args_dict|.
506        """
507        self._cf_getter = cf_getter
508        self._forgiving_parser = forgiving_parser
509        self._run_prod_code = run_prod_code
510        self._test_args = test_args
511
512
513    def retrieve_for_test(self, test_name):
514        """Retrieve a test's control data.
515
516        This ignores forgiving_parser because we cannot return a
517        forgiving value.
518
519        @param test_name: Name of test to retrieve.
520
521        @raises ControlVariableException: There is a syntax error in a
522                                          control file.
523
524        @returns a ControlData object
525        """
526        return suite_common.retrieve_control_data_for_test(
527                self._cf_getter, test_name)
528
529
530    def retrieve_for_suite(self, suite_name=''):
531        """Scan through all tests and find all tests.
532
533        @param suite_name: If specified, this method will attempt to restrain
534                           the search space to just this suite's control files.
535
536        @raises ControlVariableException: If forgiving_parser is False and there
537                                          is a syntax error in a control file.
538
539        @returns a dictionary of ControlData objects that based on given
540                 parameters.
541        """
542        tests = suite_common.retrieve_for_suite(
543                self._cf_getter, suite_name, self._forgiving_parser,
544                self._test_args)
545        if self._run_prod_code:
546            for test in tests.itervalues():
547                test.require_ssp = False
548
549        return tests
550
551
552def list_all_suites(build, devserver, cf_getter=None):
553    """
554    Parses all ControlData objects with a SUITE tag and extracts all
555    defined suite names.
556
557    @param build: the build on which we're running this suite.
558    @param devserver: the devserver which contains the build.
559    @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
560                      using DevServerGetter.
561
562    @return list of suites
563    """
564    if cf_getter is None:
565        cf_getter = _create_ds_getter(build, devserver)
566
567    suites = set()
568    predicate = lambda t: True
569    for test in find_and_parse_tests(cf_getter, predicate):
570        suites.update(test.suite_tag_parts)
571    return list(suites)
572
573
574def test_file_similarity_predicate(test_file_pattern):
575    """Returns predicate that gets the similarity based on a test's file
576    name pattern.
577
578    Builds a predicate that takes in a parsed control file (a ControlData)
579    and returns a tuple of (file path, ratio), where ratio is the
580    similarity between the test file name and the given test_file_pattern.
581
582    @param test_file_pattern: regular expression (string) to match against
583                              control file names.
584    @return a callable that takes a ControlData and and returns a tuple of
585            (file path, ratio), where ratio is the similarity between the
586            test file name and the given test_file_pattern.
587    """
588    return lambda t: ((None, 0) if not hasattr(t, 'path') else
589            (t.path, difflib.SequenceMatcher(a=t.path,
590                                             b=test_file_pattern).ratio()))
591
592
593def test_name_similarity_predicate(test_name):
594    """Returns predicate that matched based on a test's name.
595
596    Builds a predicate that takes in a parsed control file (a ControlData)
597    and returns a tuple of (test name, ratio), where ratio is the similarity
598    between the test name and the given test_name.
599
600    @param test_name: the test name to base the predicate on.
601    @return a callable that takes a ControlData and returns a tuple of
602            (test name, ratio), where ratio is the similarity between the
603            test name and the given test_name.
604    """
605    return lambda t: ((None, 0) if not hasattr(t, 'name') else
606            (t.name,
607             difflib.SequenceMatcher(a=t.name, b=test_name).ratio()))
608
609
610def matches_attribute_expression_predicate(test_attr_boolstr):
611    """Returns predicate that matches based on boolean expression of
612    attributes.
613
614    Builds a predicate that takes in a parsed control file (a ControlData)
615    ans returns True if the test attributes satisfy the given attribute
616    boolean expression.
617
618    @param test_attr_boolstr: boolean expression of the attributes to be
619                              test, like 'system:all and interval:daily'.
620
621    @return a callable that takes a ControlData and returns True if the test
622            attributes satisfy the given boolean expression.
623    """
624    return lambda t: boolparse_lib.BoolstrResult(
625        test_attr_boolstr, t.attributes)
626
627
628def test_file_matches_pattern_predicate(test_file_pattern):
629    """Returns predicate that matches based on a test's file name pattern.
630
631    Builds a predicate that takes in a parsed control file (a ControlData)
632    and returns True if the test's control file name matches the given
633    regular expression.
634
635    @param test_file_pattern: regular expression (string) to match against
636                              control file names.
637    @return a callable that takes a ControlData and and returns
638            True if control file name matches the pattern.
639    """
640    return lambda t: hasattr(t, 'path') and re.match(test_file_pattern,
641                                                     t.path)
642
643
644def test_name_matches_pattern_predicate(test_name_pattern):
645    """Returns predicate that matches based on a test's name pattern.
646
647    Builds a predicate that takes in a parsed control file (a ControlData)
648    and returns True if the test name matches the given regular expression.
649
650    @param test_name_pattern: regular expression (string) to match against
651                              test names.
652    @return a callable that takes a ControlData and returns
653            True if the name fields matches the pattern.
654    """
655    return lambda t: hasattr(t, 'name') and re.match(test_name_pattern,
656                                                     t.name)
657
658
659def test_name_equals_predicate(test_name):
660    """Returns predicate that matched based on a test's name.
661
662    Builds a predicate that takes in a parsed control file (a ControlData)
663    and returns True if the test name is equal to |test_name|.
664
665    @param test_name: the test name to base the predicate on.
666    @return a callable that takes a ControlData and looks for |test_name|
667            in that ControlData's name.
668    """
669    return lambda t: hasattr(t, 'name') and test_name == t.name
670
671
672def name_in_tag_similarity_predicate(name):
673    """Returns predicate that takes a control file and gets the similarity
674    of the suites in the control file and the given name.
675
676    Builds a predicate that takes in a parsed control file (a ControlData)
677    and returns a list of tuples of (suite name, ratio), where suite name
678    is each suite listed in the control file, and ratio is the similarity
679    between each suite and the given name.
680
681    @param name: the suite name to base the predicate on.
682    @return a callable that takes a ControlData and returns a list of tuples
683            of (suite name, ratio), where suite name is each suite listed in
684            the control file, and ratio is the similarity between each suite
685            and the given name.
686    """
687    return lambda t: [(suite,
688                       difflib.SequenceMatcher(a=suite, b=name).ratio())
689                      for suite in t.suite_tag_parts] or [(None, 0)]
690
691
692def name_in_tag_predicate(name):
693    """Returns predicate that takes a control file and looks for |name|.
694
695    Builds a predicate that takes in a parsed control file (a ControlData)
696    and returns True if the SUITE tag is present and contains |name|.
697
698    @param name: the suite name to base the predicate on.
699    @return a callable that takes a ControlData and looks for |name| in that
700            ControlData object's suite member.
701    """
702    return suite_common.name_in_tag_predicate(name)
703
704
705def create_fs_getter(autotest_dir):
706    """
707    @param autotest_dir: the place to find autotests.
708    @return a FileSystemGetter instance that looks under |autotest_dir|.
709    """
710    # currently hard-coded places to look for tests.
711    subpaths = ['server/site_tests', 'client/site_tests',
712                'server/tests', 'client/tests']
713    directories = [os.path.join(autotest_dir, p) for p in subpaths]
714    return control_file_getter.FileSystemGetter(directories)
715
716
717def _create_ds_getter(build, devserver):
718    """
719    @param build: the build on which we're running this suite.
720    @param devserver: the devserver which contains the build.
721    @return a FileSystemGetter instance that looks under |autotest_dir|.
722    """
723    return control_file_getter.DevServerGetter(build, devserver)
724
725
726def _non_experimental_tests_predicate(test_data):
727    """Test predicate for non-experimental tests."""
728    return not test_data.experimental
729
730
731def find_and_parse_tests(cf_getter, predicate, suite_name='',
732                         add_experimental=False, forgiving_parser=True,
733                         run_prod_code=False, test_args=None):
734    """
735    Function to scan through all tests and find eligible tests.
736
737    Search through all tests based on given cf_getter, suite_name,
738    add_experimental and forgiving_parser, return the tests that match
739    given predicate.
740
741    @param cf_getter: a control_file_getter.ControlFileGetter used to list
742           and fetch the content of control files
743    @param predicate: a function that should return True when run over a
744           ControlData representation of a control file that should be in
745           this Suite.
746    @param suite_name: If specified, this method will attempt to restrain
747                       the search space to just this suite's control files.
748    @param add_experimental: add tests with experimental attribute set.
749    @param forgiving_parser: If False, will raise ControlVariableExceptions
750                             if any are encountered when parsing control
751                             files. Note that this can raise an exception
752                             for syntax errors in unrelated files, because
753                             we parse them before applying the predicate.
754    @param run_prod_code: If true, the suite will run the test code that
755                          lives in prod aka the test code currently on the
756                          lab servers by disabling SSP for the discovered
757                          tests.
758    @param test_args: A dict of args to be seeded in test control file.
759
760    @raises ControlVariableException: If forgiving_parser is False and there
761                                      is a syntax error in a control file.
762
763    @return list of ControlData objects that should be run, with control
764            file text added in |text| attribute. Results are sorted based
765            on the TIME setting in control file, slowest test comes first.
766    """
767    logging.debug('Getting control file list for suite: %s', suite_name)
768    retriever = _ControlFileRetriever(cf_getter,
769                                      forgiving_parser=forgiving_parser,
770                                      run_prod_code=run_prod_code,
771                                      test_args=test_args)
772    tests = retriever.retrieve_for_suite(suite_name)
773    if not add_experimental:
774        predicate = _ComposedPredicate([predicate,
775                                        _non_experimental_tests_predicate])
776    return suite_common.filter_tests(tests, predicate)
777
778
779def find_possible_tests(cf_getter, predicate, suite_name='', count=10):
780    """
781    Function to scan through all tests and find possible tests.
782
783    Search through all tests based on given cf_getter, suite_name,
784    add_experimental and forgiving_parser. Use the given predicate to
785    calculate the similarity and return the top 10 matches.
786
787    @param cf_getter: a control_file_getter.ControlFileGetter used to list
788           and fetch the content of control files
789    @param predicate: a function that should return a tuple of (name, ratio)
790           when run over a ControlData representation of a control file that
791           should be in this Suite. `name` is the key to be compared, e.g.,
792           a suite name or test name. `ratio` is a value between [0,1]
793           indicating the similarity of `name` and the value to be compared.
794    @param suite_name: If specified, this method will attempt to restrain
795                       the search space to just this suite's control files.
796    @param count: Number of suggestions to return, default to 10.
797
798    @return list of top names that similar to the given test, sorted by
799            match ratio.
800    """
801    logging.debug('Getting control file list for suite: %s', suite_name)
802    tests = _ControlFileRetriever(cf_getter).retrieve_for_suite(suite_name)
803    logging.debug('Parsed %s control files.', len(tests))
804    similarities = {}
805    for test in tests.itervalues():
806        ratios = predicate(test)
807        # Some predicates may return a list of tuples, e.g.,
808        # name_in_tag_similarity_predicate. Convert all returns to a list.
809        if not isinstance(ratios, list):
810            ratios = [ratios]
811        for name, ratio in ratios:
812            similarities[name] = ratio
813    return [s[0] for s in
814            sorted(similarities.items(), key=operator.itemgetter(1),
815                   reverse=True)][:count]
816
817
818def _deprecated_suite_method(func):
819    """Decorator for deprecated Suite static methods.
820
821    TODO(ayatane): This is used to decorate functions that are called as
822    static methods on Suite.
823    """
824    @functools.wraps(func)
825    def wrapper(*args, **kwargs):
826        """Wraps |func| for warning."""
827        warnings.warn('Calling method "%s" from Suite is deprecated' %
828                      func.__name__)
829        return func(*args, **kwargs)
830    return staticmethod(wrapper)
831
832
833class _BaseSuite(object):
834    """
835    A suite of tests, defined by some predicate over control file variables.
836
837    Given a place to search for control files a predicate to match the desired
838    tests, can gather tests and fire off jobs to run them, and then wait for
839    results.
840
841    @var _predicate: a function that should return True when run over a
842         ControlData representation of a control file that should be in
843         this Suite.
844    @var _tag: a string with which to tag jobs run in this suite.
845    @var _builds: the builds on which we're running this suite.
846    @var _afe: an instance of AFE as defined in server/frontend.py.
847    @var _tko: an instance of TKO as defined in server/frontend.py.
848    @var _jobs: currently scheduled jobs, if any.
849    @var _jobs_to_tests: a dictionary that maps job ids to tests represented
850                         ControlData objects.
851    @var _retry: a bool value indicating whether jobs should be retried on
852                 failure.
853    @var _retry_handler: a RetryHandler object.
854
855    """
856
857
858    def __init__(
859            self,
860            tests,
861            tag,
862            builds,
863            board,
864            afe=None,
865            tko=None,
866            pool=None,
867            results_dir=None,
868            max_runtime_mins=24*60,
869            timeout_mins=24*60,
870            file_bugs=False,
871            suite_job_id=None,
872            ignore_deps=False,
873            extra_deps=None,
874            priority=priorities.Priority.DEFAULT,
875            wait_for_results=True,
876            job_retry=False,
877            max_retries=sys.maxint,
878            offload_failures_only=False,
879            test_source_build=None,
880            job_keyvals=None,
881            child_dependencies=(),
882            result_reporter=None,
883    ):
884        """Initialize instance.
885
886        @param tests: Iterable of tests to run.
887        @param tag: a string with which to tag jobs run in this suite.
888        @param builds: the builds on which we're running this suite.
889        @param board: the board on which we're running this suite.
890        @param afe: an instance of AFE as defined in server/frontend.py.
891        @param tko: an instance of TKO as defined in server/frontend.py.
892        @param pool: Specify the pool of machines to use for scheduling
893                purposes.
894        @param results_dir: The directory where the job can write results to.
895                            This must be set if you want job_id of sub-jobs
896                            list in the job keyvals.
897        @param max_runtime_mins: Maximum suite runtime, in minutes.
898        @param timeout: Maximum job lifetime, in hours.
899        @param suite_job_id: Job id that will act as parent id to all sub jobs.
900                             Default: None
901        @param ignore_deps: True if jobs should ignore the DEPENDENCIES
902                            attribute and skip applying of dependency labels.
903                            (Default:False)
904        @param extra_deps: A list of strings which are the extra DEPENDENCIES
905                           to add to each test being scheduled.
906        @param priority: Integer priority level.  Higher is more important.
907        @param wait_for_results: Set to False to run the suite job without
908                                 waiting for test jobs to finish. Default is
909                                 True.
910        @param job_retry: A bool value indicating whether jobs should be retried
911                          on failure. If True, the field 'JOB_RETRIES' in
912                          control files will be respected. If False, do not
913                          retry.
914        @param max_retries: Maximum retry limit at suite level.
915                            Regardless how many times each individual test
916                            has been retried, the total number of retries
917                            happening in the suite can't exceed _max_retries.
918                            Default to sys.maxint.
919        @param offload_failures_only: Only enable gs_offloading for failed
920                                      jobs.
921        @param test_source_build: Build that contains the server-side test code.
922        @param job_keyvals: General job keyvals to be inserted into keyval file,
923                            which will be used by tko/parse later.
924        @param child_dependencies: (optional) list of dependency strings
925                to be added as dependencies to child jobs.
926        @param result_reporter: A _ResultReporter instance to report results. If
927                None, an _EmailReporter will be created.
928        """
929
930        self.tests = list(tests)
931        self._tag = tag
932        self._builds = builds
933        self._results_dir = results_dir
934        self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
935                                                         delay_sec=10,
936                                                         debug=False)
937        self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
938                                                         delay_sec=10,
939                                                         debug=False)
940        self._jobs = []
941        self._jobs_to_tests = {}
942
943        self._file_bugs = file_bugs
944        self._suite_job_id = suite_job_id
945        self._job_retry=job_retry
946        self._max_retries = max_retries
947        # RetryHandler to be initialized in schedule()
948        self._retry_handler = None
949        self.wait_for_results = wait_for_results
950        self._job_keyvals = job_keyvals
951        if result_reporter is None:
952            self._result_reporter = _EmailReporter(self)
953        else:
954            self._result_reporter = result_reporter
955
956        if extra_deps is None:
957            extra_deps = []
958        extra_deps.append(board)
959        if pool:
960            extra_deps.append(pool)
961        extra_deps.extend(child_dependencies)
962        self._dependencies = tuple(extra_deps)
963
964        self._job_creator = _SuiteChildJobCreator(
965            tag=tag,
966            builds=builds,
967            board=board,
968            afe=afe,
969            max_runtime_mins=max_runtime_mins,
970            timeout_mins=timeout_mins,
971            suite_job_id=suite_job_id,
972            ignore_deps=ignore_deps,
973            extra_deps=extra_deps,
974            priority=priority,
975            offload_failures_only=offload_failures_only,
976            test_source_build=test_source_build,
977            job_keyvals=job_keyvals,
978        )
979
980
981    def _schedule_test(self, record, test, retry_for=None):
982        """Schedule a single test and return the job.
983
984        Schedule a single test by creating a job, and then update relevant
985        data structures that are used to keep track of all running jobs.
986
987        Emits a TEST_NA status log entry if it failed to schedule the test due
988        to NoEligibleHostException or a non-existent board label.
989
990        Returns a frontend.Job object if the test is successfully scheduled.
991        If scheduling failed due to NoEligibleHostException or a non-existent
992        board label, returns None.
993
994        @param record: A callable to use for logging.
995                       prototype: record(base_job.status_log_entry)
996        @param test: ControlData for a test to run.
997        @param retry_for: If we are scheduling a test to retry an
998                          old job, the afe_job_id of the old job
999                          will be passed in as |retry_for|.
1000
1001        @returns: A frontend.Job object or None
1002        """
1003        msg = 'Scheduling %s' % test.name
1004        if retry_for:
1005            msg = msg + ', to retry afe job %d' % retry_for
1006        logging.debug(msg)
1007        begin_time_str = datetime.datetime.now().strftime(time_utils.TIME_FMT)
1008        try:
1009            job = self._job_creator.create_job(test, retry_for=retry_for)
1010        except (error.NoEligibleHostException, proxy.ValidationError) as e:
1011            if (isinstance(e, error.NoEligibleHostException)
1012                or (isinstance(e, proxy.ValidationError)
1013                    and _is_nonexistent_board_error(e))):
1014                # Treat a dependency on a non-existent board label the same as
1015                # a dependency on a board that exists, but for which there's no
1016                # hardware.
1017                logging.debug('%s not applicable for this board/pool. '
1018                              'Emitting TEST_NA.', test.name)
1019                Status('TEST_NA', test.name,
1020                       'Skipping:  test not supported on this board/pool.',
1021                       begin_time_str=begin_time_str).record_all(record)
1022                return None
1023            else:
1024                raise e
1025        except (error.RPCException, proxy.JSONRPCException):
1026            if retry_for:
1027                # Mark that we've attempted to retry the old job.
1028                logging.debug("RPC exception occurred")
1029                self._retry_handler.set_attempted(job_id=retry_for)
1030            raise
1031        else:
1032            self._jobs.append(job)
1033            self._jobs_to_tests[job.id] = test
1034            if retry_for:
1035                # A retry job was just created, record it.
1036                self._retry_handler.add_retry(
1037                        old_job_id=retry_for, new_job_id=job.id)
1038                retry_count = (test.job_retries -
1039                               self._retry_handler.get_retry_max(job.id))
1040                logging.debug('Job %d created to retry job %d. '
1041                              'Have retried for %d time(s)',
1042                              job.id, retry_for, retry_count)
1043            self._remember_job_keyval(job)
1044            return job
1045
1046    def schedule(self, record):
1047        """
1048        Schedule jobs using |self._afe|.
1049
1050        frontend.Job objects representing each scheduled job will be put in
1051        |self._jobs|.
1052
1053        @param record: A callable to use for logging.
1054                       prototype: record(base_job.status_log_entry)
1055        @returns: The number of tests that were scheduled.
1056        """
1057        scheduled_test_names = []
1058        logging.debug('Discovered %d tests.', len(self.tests))
1059
1060        Status('INFO', 'Start %s' % self._tag).record_result(record)
1061        try:
1062            # Write job_keyvals into keyval file.
1063            if self._job_keyvals:
1064                utils.write_keyval(self._results_dir, self._job_keyvals)
1065
1066            # TODO(crbug.com/730885): This is a hack to protect tests that are
1067            # not usually retried from getting hit by a provision error when run
1068            # as part of a suite. Remove this hack once provision is separated
1069            # out in its own suite.
1070            self._bump_up_test_retries(self.tests)
1071            for test in self.tests:
1072                scheduled_job = self._schedule_test(record, test)
1073                if scheduled_job is not None:
1074                    scheduled_test_names.append(test.name)
1075
1076            # Write the num of scheduled tests and name of them to keyval file.
1077            logging.debug('Scheduled %d tests, writing the total to keyval.',
1078                          len(scheduled_test_names))
1079            utils.write_keyval(
1080                self._results_dir,
1081                self._make_scheduled_tests_keyvals(scheduled_test_names))
1082        except Exception:
1083            logging.exception('Exception while scheduling suite')
1084            Status('FAIL', self._tag,
1085                   'Exception while scheduling suite').record_result(record)
1086
1087        if self._job_retry:
1088            logging.debug("Initializing RetryHandler for suite %s.", self._tag)
1089            self._retry_handler = RetryHandler(
1090                    initial_jobs_to_tests=self._jobs_to_tests,
1091                    max_retries=self._max_retries)
1092            logging.debug("retry map created: %s ",
1093                          self._retry_handler._retry_map)
1094        else:
1095            logging.info("Will not retry jobs from suite %s.", self._tag)
1096        return len(scheduled_test_names)
1097
1098
1099    def _bump_up_test_retries(self, tests):
1100        """Bump up individual test retries to match suite retry options."""
1101        if not self._job_retry:
1102            return
1103
1104        for test in tests:
1105            # We do honor if a test insists on JOB_RETRIES = 0.
1106            if test.job_retries is None:
1107                logging.debug(
1108                        'Test %s did not request retries, but suite requires '
1109                        'retries. Bumping retries up to 1. '
1110                        '(See crbug.com/730885)',
1111                        test.name)
1112                test.job_retries = 1
1113
1114
1115    def _make_scheduled_tests_keyvals(self, scheduled_test_names):
1116        """Make a keyvals dict to write for scheduled test names.
1117
1118        @param scheduled_test_names: A list of scheduled test name strings.
1119
1120        @returns: A keyvals dict.
1121        """
1122        return {
1123            constants.SCHEDULED_TEST_COUNT_KEY: len(scheduled_test_names),
1124            constants.SCHEDULED_TEST_NAMES_KEY: repr(scheduled_test_names),
1125        }
1126
1127
1128    def _should_report(self, result):
1129        """
1130        Returns True if this failure requires to be reported.
1131
1132        @param result: A result, encapsulating the status of the failed job.
1133        @return: True if we should report this failure.
1134        """
1135        return (self._file_bugs and result.test_executed and
1136                not result.is_testna() and
1137                result.is_worse_than(job_status.Status('GOOD', '', 'reason')))
1138
1139
1140    def _has_retry(self, result):
1141        """
1142        Return True if this result gets to retry.
1143
1144        @param result: A result, encapsulating the status of the failed job.
1145        @return: bool
1146        """
1147        return (self._job_retry
1148                and self._retry_handler.has_following_retry(result))
1149
1150
1151    def wait(self, record):
1152        """
1153        Polls for the job statuses, using |record| to print status when each
1154        completes.
1155
1156        @param record: callable that records job status.
1157                 prototype:
1158                   record(base_job.status_log_entry)
1159        """
1160        waiter = job_status.JobResultWaiter(self._afe, self._tko)
1161        try:
1162            if self._suite_job_id:
1163                jobs = self._afe.get_jobs(parent_job_id=self._suite_job_id)
1164            else:
1165                logging.warning('Unknown suite_job_id, falling back to less '
1166                                'efficient results_generator.')
1167                jobs = self._jobs
1168            waiter.add_jobs(jobs)
1169            for result in waiter.wait_for_results():
1170                self._handle_result(result=result, record=record, waiter=waiter)
1171                if self._finished_waiting():
1172                    break
1173        except Exception:  # pylint: disable=W0703
1174            logging.exception('Exception waiting for results')
1175            Status('FAIL', self._tag,
1176                   'Exception waiting for results').record_result(record)
1177
1178
1179    def _finished_waiting(self):
1180        """Return whether the suite is finished waiting for child jobs."""
1181        return False
1182
1183
1184    def _handle_result(self, result, record, waiter):
1185        """
1186        Handle a test job result.
1187
1188        @param result: Status instance for job.
1189        @param record: callable that records job status.
1190                 prototype:
1191                   record(base_job.status_log_entry)
1192        @param waiter: JobResultsWaiter instance.
1193
1194        @instance_param _result_reporter: _ResultReporter instance.
1195        """
1196        self._record_result(result, record)
1197        rescheduled = False
1198        if self._job_retry and self._retry_handler._should_retry(result):
1199            rescheduled = self._retry_result(result, record, waiter)
1200        # TODO (crbug.com/751428): If the suite times out before a retry could
1201        # finish, we would lose the chance to report errors from the original
1202        # job.
1203        if self._has_retry(result) and rescheduled:
1204             return
1205
1206        if self._should_report(result):
1207            self._result_reporter.report(result)
1208
1209    def _record_result(self, result, record):
1210        """
1211        Record a test job result.
1212
1213        @param result: Status instance for job.
1214        @param record: callable that records job status.
1215                 prototype:
1216                   record(base_job.status_log_entry)
1217        """
1218        result.record_all(record)
1219        self._remember_job_keyval(result)
1220
1221
1222    def _retry_result(self, result, record, waiter):
1223        """
1224        Retry a test job result.
1225
1226        @param result: Status instance for job.
1227        @param record: callable that records job status.
1228                 prototype:
1229                   record(base_job.status_log_entry)
1230        @param waiter: JobResultsWaiter instance.
1231        @returns: True if a job was scheduled for retry, False otherwise.
1232        """
1233        test = self._jobs_to_tests[result.id]
1234        try:
1235            # It only takes effect for CQ retriable job:
1236            #   1) in first try, test.fast=True.
1237            #   2) in second try, test will be run in normal mode, so reset
1238            #       test.fast=False.
1239            test.fast = False
1240            new_job = self._schedule_test(
1241                    record=record, test=test, retry_for=result.id)
1242        except (error.RPCException, proxy.JSONRPCException) as e:
1243            logging.error('Failed to schedule test: %s, Reason: %s',
1244                          test.name, e)
1245            return False
1246        else:
1247            waiter.add_job(new_job)
1248            return bool(new_job)
1249
1250    @property
1251    def jobs(self):
1252        """Give a copy of the associated jobs
1253
1254        @returns: array of jobs"""
1255        return [job for job in self._jobs]
1256
1257
1258    @property
1259    def _should_file_bugs(self):
1260        """Return whether bugs should be filed.
1261
1262        @returns: bool
1263        """
1264        # File bug when failure is one of the _FILE_BUG_SUITES,
1265        # otherwise send an email to the owner anc cc.
1266        return self._tag in _FILE_BUG_SUITES
1267
1268
1269    def abort(self):
1270        """
1271        Abort all scheduled test jobs.
1272        """
1273        if self._jobs:
1274            job_ids = [job.id for job in self._jobs]
1275            self._afe.run('abort_host_queue_entries', job__id__in=job_ids)
1276
1277
1278    def _remember_job_keyval(self, job):
1279        """
1280        Record provided job as a suite job keyval, for later referencing.
1281
1282        @param job: some representation of a job that has the attributes:
1283                    id, test_name, and owner
1284        """
1285        if self._results_dir and job.id and job.owner and job.test_name:
1286            job_id_owner = '%s-%s' % (job.id, job.owner)
1287            logging.debug('Adding job keyval for %s=%s',
1288                          job.test_name, job_id_owner)
1289            utils.write_keyval(
1290                self._results_dir,
1291                {hashlib.md5(job.test_name).hexdigest(): job_id_owner})
1292
1293
1294class Suite(_BaseSuite):
1295    """
1296    A suite of tests, defined by some predicate over control file variables.
1297
1298    Given a place to search for control files a predicate to match the desired
1299    tests, can gather tests and fire off jobs to run them, and then wait for
1300    results.
1301
1302    @var _predicate: a function that should return True when run over a
1303         ControlData representation of a control file that should be in
1304         this Suite.
1305    @var _tag: a string with which to tag jobs run in this suite.
1306    @var _builds: the builds on which we're running this suite.
1307    @var _afe: an instance of AFE as defined in server/frontend.py.
1308    @var _tko: an instance of TKO as defined in server/frontend.py.
1309    @var _jobs: currently scheduled jobs, if any.
1310    @var _jobs_to_tests: a dictionary that maps job ids to tests represented
1311                         ControlData objects.
1312    @var _cf_getter: a control_file_getter.ControlFileGetter
1313    @var _retry: a bool value indicating whether jobs should be retried on
1314                 failure.
1315    @var _retry_handler: a RetryHandler object.
1316
1317    """
1318
1319    # TODO(ayatane): These methods are kept on the Suite class for
1320    # backward compatibility.
1321    find_and_parse_tests = _deprecated_suite_method(find_and_parse_tests)
1322    find_possible_tests = _deprecated_suite_method(find_possible_tests)
1323    create_fs_getter = _deprecated_suite_method(create_fs_getter)
1324    name_in_tag_predicate = _deprecated_suite_method(
1325            suite_common.name_in_tag_predicate)
1326    name_in_tag_similarity_predicate = _deprecated_suite_method(
1327            name_in_tag_similarity_predicate)
1328    test_name_equals_predicate = _deprecated_suite_method(
1329            test_name_equals_predicate)
1330    test_name_in_list_predicate = _deprecated_suite_method(
1331            suite_common.test_name_in_list_predicate)
1332    test_name_matches_pattern_predicate = _deprecated_suite_method(
1333            test_name_matches_pattern_predicate)
1334    test_file_matches_pattern_predicate = _deprecated_suite_method(
1335            test_file_matches_pattern_predicate)
1336    matches_attribute_expression_predicate = _deprecated_suite_method(
1337            matches_attribute_expression_predicate)
1338    test_name_similarity_predicate = _deprecated_suite_method(
1339            test_name_similarity_predicate)
1340    test_file_similarity_predicate = _deprecated_suite_method(
1341            test_file_similarity_predicate)
1342    list_all_suites = _deprecated_suite_method(list_all_suites)
1343    get_test_source_build = _deprecated_suite_method(
1344            suite_common.get_test_source_build)
1345
1346
1347    @classmethod
1348    def create_from_predicates(cls, predicates, builds, board, devserver,
1349                               cf_getter=None, name='ad_hoc_suite',
1350                               run_prod_code=False, **dargs):
1351        """
1352        Create a Suite using a given predicate test filters.
1353
1354        Uses supplied predicate(s) to instantiate a Suite. Looks for tests in
1355        |autotest_dir| and will schedule them using |afe|.  Pulls control files
1356        from the default dev server. Results will be pulled from |tko| upon
1357        completion.
1358
1359        @param predicates: A list of callables that accept ControlData
1360                           representations of control files. A test will be
1361                           included in suite if all callables in this list
1362                           return True on the given control file.
1363        @param builds: the builds on which we're running this suite. It's a
1364                       dictionary of version_prefix:build.
1365        @param board: the board on which we're running this suite.
1366        @param devserver: the devserver which contains the build.
1367        @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
1368                          using DevServerGetter.
1369        @param name: name of suite. Defaults to 'ad_hoc_suite'
1370        @param run_prod_code: If true, the suite will run the tests that
1371                              lives in prod aka the test code currently on the
1372                              lab servers.
1373        @param **dargs: Any other Suite constructor parameters, as described
1374                        in Suite.__init__ docstring.
1375        @return a Suite instance.
1376        """
1377        if cf_getter is None:
1378            if run_prod_code:
1379                cf_getter = create_fs_getter(_AUTOTEST_DIR)
1380            else:
1381                build = suite_common.get_test_source_build(builds, **dargs)
1382                cf_getter = _create_ds_getter(build, devserver)
1383
1384        return cls(predicates,
1385                   name, builds, board, cf_getter, run_prod_code, **dargs)
1386
1387
1388    @classmethod
1389    def create_from_name(cls, name, builds, board, devserver, cf_getter=None,
1390                         **dargs):
1391        """
1392        Create a Suite using a predicate based on the SUITE control file var.
1393
1394        Makes a predicate based on |name| and uses it to instantiate a Suite
1395        that looks for tests in |autotest_dir| and will schedule them using
1396        |afe|.  Pulls control files from the default dev server.
1397        Results will be pulled from |tko| upon completion.
1398
1399        @param name: a value of the SUITE control file variable to search for.
1400        @param builds: the builds on which we're running this suite. It's a
1401                       dictionary of version_prefix:build.
1402        @param board: the board on which we're running this suite.
1403        @param devserver: the devserver which contains the build.
1404        @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
1405                          using DevServerGetter.
1406        @param **dargs: Any other Suite constructor parameters, as described
1407                        in Suite.__init__ docstring.
1408        @return a Suite instance.
1409        """
1410        if cf_getter is None:
1411            build = suite_common.get_test_source_build(builds, **dargs)
1412            cf_getter = _create_ds_getter(build, devserver)
1413
1414        return cls([suite_common.name_in_tag_predicate(name)],
1415                   name, builds, board, cf_getter, **dargs)
1416
1417
1418    def __init__(
1419            self,
1420            predicates,
1421            tag,
1422            builds,
1423            board,
1424            cf_getter,
1425            run_prod_code=False,
1426            afe=None,
1427            tko=None,
1428            pool=None,
1429            results_dir=None,
1430            max_runtime_mins=24*60,
1431            timeout_mins=24*60,
1432            file_bugs=False,
1433            suite_job_id=None,
1434            ignore_deps=False,
1435            extra_deps=None,
1436            priority=priorities.Priority.DEFAULT,
1437            forgiving_parser=True,
1438            wait_for_results=True,
1439            job_retry=False,
1440            max_retries=sys.maxint,
1441            offload_failures_only=False,
1442            test_source_build=None,
1443            job_keyvals=None,
1444            test_args=None,
1445            child_dependencies=(),
1446            result_reporter=None,
1447    ):
1448        """
1449        Constructor
1450
1451        @param predicates: A list of callables that accept ControlData
1452                           representations of control files. A test will be
1453                           included in suite if all callables in this list
1454                           return True on the given control file.
1455        @param tag: a string with which to tag jobs run in this suite.
1456        @param builds: the builds on which we're running this suite.
1457        @param board: the board on which we're running this suite.
1458        @param cf_getter: a control_file_getter.ControlFileGetter
1459        @param afe: an instance of AFE as defined in server/frontend.py.
1460        @param tko: an instance of TKO as defined in server/frontend.py.
1461        @param pool: Specify the pool of machines to use for scheduling
1462                purposes.
1463        @param run_prod_code: If true, the suite will run the test code that
1464                              lives in prod aka the test code currently on the
1465                              lab servers.
1466        @param results_dir: The directory where the job can write results to.
1467                            This must be set if you want job_id of sub-jobs
1468                            list in the job keyvals.
1469        @param max_runtime_mins: Maximum suite runtime, in minutes.
1470        @param timeout: Maximum job lifetime, in hours.
1471        @param suite_job_id: Job id that will act as parent id to all sub jobs.
1472                             Default: None
1473        @param ignore_deps: True if jobs should ignore the DEPENDENCIES
1474                            attribute and skip applying of dependency labels.
1475                            (Default:False)
1476        @param extra_deps: A list of strings which are the extra DEPENDENCIES
1477                           to add to each test being scheduled.
1478        @param priority: Integer priority level.  Higher is more important.
1479        @param wait_for_results: Set to False to run the suite job without
1480                                 waiting for test jobs to finish. Default is
1481                                 True.
1482        @param job_retry: A bool value indicating whether jobs should be retried
1483                          on failure. If True, the field 'JOB_RETRIES' in
1484                          control files will be respected. If False, do not
1485                          retry.
1486        @param max_retries: Maximum retry limit at suite level.
1487                            Regardless how many times each individual test
1488                            has been retried, the total number of retries
1489                            happening in the suite can't exceed _max_retries.
1490                            Default to sys.maxint.
1491        @param offload_failures_only: Only enable gs_offloading for failed
1492                                      jobs.
1493        @param test_source_build: Build that contains the server-side test code.
1494        @param job_keyvals: General job keyvals to be inserted into keyval file,
1495                            which will be used by tko/parse later.
1496        @param test_args: A dict of args passed all the way to each individual
1497                          test that will be actually ran.
1498        @param child_dependencies: (optional) list of dependency strings
1499                to be added as dependencies to child jobs.
1500        @param result_reporter: A _ResultReporter instance to report results. If
1501                None, an _EmailReporter will be created.
1502        """
1503        tests = find_and_parse_tests(
1504                cf_getter,
1505                _ComposedPredicate(predicates),
1506                tag,
1507                forgiving_parser=forgiving_parser,
1508                run_prod_code=run_prod_code,
1509                test_args=test_args,
1510        )
1511        super(Suite, self).__init__(
1512                tests=tests,
1513                tag=tag,
1514                builds=builds,
1515                board=board,
1516                afe=afe,
1517                tko=tko,
1518                pool=pool,
1519                results_dir=results_dir,
1520                max_runtime_mins=max_runtime_mins,
1521                timeout_mins=timeout_mins,
1522                file_bugs=file_bugs,
1523                suite_job_id=suite_job_id,
1524                ignore_deps=ignore_deps,
1525                extra_deps=extra_deps,
1526                priority=priority,
1527                wait_for_results=wait_for_results,
1528                job_retry=job_retry,
1529                max_retries=max_retries,
1530                offload_failures_only=offload_failures_only,
1531                test_source_build=test_source_build,
1532                job_keyvals=job_keyvals,
1533                child_dependencies=child_dependencies,
1534                result_reporter=result_reporter,
1535        )
1536
1537
1538class ProvisionSuite(_BaseSuite):
1539    """
1540    A suite for provisioning DUTs.
1541
1542    This is done by creating dummy_Pass tests.
1543    """
1544
1545
1546    def __init__(
1547            self,
1548            tag,
1549            builds,
1550            board,
1551            devserver,
1552            num_required,
1553            num_max=float('inf'),
1554            cf_getter=None,
1555            run_prod_code=False,
1556            test_args=None,
1557            test_source_build=None,
1558            **kwargs):
1559        """
1560        Constructor
1561
1562        @param tag: a string with which to tag jobs run in this suite.
1563        @param builds: the builds on which we're running this suite.
1564        @param board: the board on which we're running this suite.
1565        @param devserver: the devserver which contains the build.
1566        @param num_required: number of tests that must pass.  This is
1567                             capped by the number of tests that are run.
1568        @param num_max: max number of tests to make.  By default there
1569                        is no cap, a test is created for each eligible host.
1570        @param cf_getter: a control_file_getter.ControlFileGetter.
1571        @param test_args: A dict of args passed all the way to each individual
1572                          test that will be actually ran.
1573        @param test_source_build: Build that contains the server-side test code.
1574        @param kwargs: Various keyword arguments passed to
1575                       _BaseSuite constructor.
1576        """
1577        super(ProvisionSuite, self).__init__(
1578                tests=[],
1579                tag=tag,
1580                builds=builds,
1581                board=board,
1582                **kwargs)
1583        self._num_successful = 0
1584        self._num_required = 0
1585        self.tests = []
1586
1587        static_deps = [dep for dep in self._dependencies
1588                       if not provision.Provision.acts_on(dep)]
1589        if 'pool:suites' in static_deps:
1590            logging.info('Provision suite is disabled on suites pool')
1591            return
1592        logging.debug('Looking for hosts matching %r', static_deps)
1593        hosts = self._afe.get_hosts(
1594                invalid=False, multiple_labels=static_deps)
1595        logging.debug('Found %d matching hosts for ProvisionSuite', len(hosts))
1596        available_hosts = [h for h in hosts if h.is_available()]
1597        logging.debug('Found %d available hosts for ProvisionSuite',
1598                      len(available_hosts))
1599        dummy_test = _load_dummy_test(
1600                builds, devserver, cf_getter,
1601                run_prod_code, test_args, test_source_build)
1602        self.tests = [dummy_test] * min(len(available_hosts), num_max)
1603        logging.debug('Made %d tests for ProvisionSuite', len(self.tests))
1604        self._num_required = min(num_required, len(self.tests))
1605        logging.debug('Expecting %d tests to pass for ProvisionSuite',
1606                      self._num_required)
1607
1608    def _handle_result(self, result, record, waiter):
1609        super(ProvisionSuite, self)._handle_result(result, record, waiter)
1610        if result.is_good():
1611            self._num_successful += 1
1612
1613    def _finished_waiting(self):
1614        return self._num_successful >= self._num_required
1615
1616
1617def _load_dummy_test(
1618        builds,
1619        devserver,
1620        cf_getter=None,
1621        run_prod_code=False,
1622        test_args=None,
1623        test_source_build=None):
1624    """
1625    Load and return the dummy pass test.
1626
1627    @param builds: the builds on which we're running this suite.
1628    @param devserver: the devserver which contains the build.
1629    @param cf_getter: a control_file_getter.ControlFileGetter.
1630    @param test_args: A dict of args passed all the way to each individual
1631                      test that will be actually ran.
1632    @param test_source_build: Build that contains the server-side test code.
1633    """
1634    if cf_getter is None:
1635        if run_prod_code:
1636            cf_getter = create_fs_getter(_AUTOTEST_DIR)
1637        else:
1638            build = suite_common.get_test_source_build(
1639                    builds, test_source_build=test_source_build)
1640            devserver.stage_artifacts(image=build,
1641                                      artifacts=['control_files'])
1642            cf_getter = _create_ds_getter(build, devserver)
1643    retriever = _ControlFileRetriever(cf_getter,
1644                                      run_prod_code=run_prod_code,
1645                                      test_args=test_args)
1646    return retriever.retrieve_for_test('dummy_Pass')
1647
1648
1649class _ComposedPredicate(object):
1650    """Return the composition of the predicates.
1651
1652    Predicates are functions that take a test control data object and
1653    return True of that test is to be included.  The returned
1654    predicate's set is the intersection of all of the input predicates'
1655    sets (it returns True if all predicates return True).
1656    """
1657
1658    def __init__(self, predicates):
1659        """Initialize instance.
1660
1661        @param predicates: Iterable of predicates.
1662        """
1663        self._predicates = list(predicates)
1664
1665    def __repr__(self):
1666        return '{cls}({this._predicates!r})'.format(
1667            cls=type(self).__name__,
1668            this=self,
1669        )
1670
1671    def __call__(self, control_data_):
1672        return all(f(control_data_) for f in self._predicates)
1673
1674
1675def _is_nonexistent_board_error(e):
1676    """Return True if error is caused by nonexistent board label.
1677
1678    As of this writing, the particular case we want looks like this:
1679
1680     1) e.problem_keys is a dictionary
1681     2) e.problem_keys['meta_hosts'] exists as the only key
1682        in the dictionary.
1683     3) e.problem_keys['meta_hosts'] matches this pattern:
1684        "Label "board:.*" not found"
1685
1686    We check for conditions 1) and 2) on the
1687    theory that they're relatively immutable.
1688    We don't check condition 3) because it seems
1689    likely to be a maintenance burden, and for the
1690    times when we're wrong, being right shouldn't
1691    matter enough (we _hope_).
1692
1693    @param e: proxy.ValidationError instance
1694    @returns: boolean
1695    """
1696    return (isinstance(e.problem_keys, dict)
1697            and len(e.problem_keys) == 1
1698            and 'meta_hosts' in e.problem_keys)
1699
1700
1701class _ResultReporter(object):
1702    """Abstract base class for reporting test results.
1703
1704    Usually, this is used to report test failures.
1705    """
1706
1707    __metaclass__ = abc.ABCMeta
1708
1709    @abc.abstractmethod
1710    def report(self, result):
1711        """Report test result.
1712
1713        @param result: Status instance for job.
1714        """
1715
1716
1717class _EmailReporter(_ResultReporter):
1718    """Class that emails based on test failures."""
1719
1720    def __init__(self, suite, bug_template=None):
1721        self._suite = suite
1722        self._bug_template = bug_template or {}
1723
1724    def _get_test_bug(self, result):
1725        """Get TestBug for the given result.
1726
1727        @param result: Status instance for a test job.
1728        @returns: TestBug instance.
1729        """
1730        # reporting modules have dependency on external packages, e.g., httplib2
1731        # Such dependency can cause issue to any module tries to import suite.py
1732        # without building site-packages first. Since the reporting modules are
1733        # only used in this function, move the imports here avoid the
1734        # requirement of building site packages to use other functions in this
1735        # module.
1736        from autotest_lib.server.cros.dynamic_suite import reporting
1737
1738        job_views = self._suite._tko.run('get_detailed_test_views',
1739                                         afe_job_id=result.id)
1740        return reporting.TestBug(self._suite._job_creator.cros_build,
1741                utils.get_chrome_version(job_views),
1742                self._suite._tag,
1743                result)
1744
1745    def _get_bug_template(self, result):
1746        """Get BugTemplate for test job.
1747
1748        @param result: Status instance for job.
1749        @param bug_template: A template dictionary specifying the default bug
1750                             filing options for failures in this suite.
1751        @returns: BugTemplate instance
1752        """
1753        # reporting modules have dependency on external packages, e.g., httplib2
1754        # Such dependency can cause issue to any module tries to import suite.py
1755        # without building site-packages first. Since the reporting modules are
1756        # only used in this function, move the imports here avoid the
1757        # requirement of building site packages to use other functions in this
1758        # module.
1759        from autotest_lib.server.cros.dynamic_suite import reporting_utils
1760
1761        # Try to merge with bug template in test control file.
1762        template = reporting_utils.BugTemplate(self._bug_template)
1763        try:
1764            test_data = self._suite._jobs_to_tests[result.id]
1765            return template.finalize_bug_template(
1766                    test_data.bug_template)
1767        except AttributeError:
1768            # Test control file does not have bug template defined.
1769            return template.bug_template
1770        except reporting_utils.InvalidBugTemplateException as e:
1771            logging.error('Merging bug templates failed with '
1772                          'error: %s An empty bug template will '
1773                          'be used.', e)
1774            return {}
1775
1776    def report(self, result):
1777        # reporting modules have dependency on external
1778        # packages, e.g., httplib2 Such dependency can cause
1779        # issue to any module tries to import suite.py without
1780        # building site-packages first. Since the reporting
1781        # modules are only used in this function, move the
1782        # imports here avoid the requirement of building site
1783        # packages to use other functions in this module.
1784        from autotest_lib.server.cros.dynamic_suite import reporting
1785
1786        reporting.send_email(
1787                self._get_test_bug(result),
1788                self._get_bug_template(result))
1789