• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Lint as: python2, python3
2# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6from __future__ import absolute_import
7from __future__ import division
8from __future__ import print_function
9
10import abc
11import datetime
12import difflib
13import functools
14import hashlib
15import logging
16import operator
17import os
18import re
19import six
20import sys
21import warnings
22
23import common
24
25from autotest_lib.frontend.afe.json_rpc import proxy
26from autotest_lib.client.common_lib import autotest_enum
27from autotest_lib.client.common_lib import error
28from autotest_lib.client.common_lib import global_config
29from autotest_lib.client.common_lib import priorities
30from autotest_lib.client.common_lib import time_utils
31from autotest_lib.client.common_lib import utils
32from autotest_lib.frontend.afe import model_attributes
33from autotest_lib.frontend.afe.json_rpc import proxy
34from autotest_lib.server.cros import provision
35from autotest_lib.server.cros.dynamic_suite import constants
36from autotest_lib.server.cros.dynamic_suite import control_file_getter
37from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
38from autotest_lib.server.cros.dynamic_suite import job_status
39from autotest_lib.server.cros.dynamic_suite import suite_common
40from autotest_lib.server.cros.dynamic_suite import tools
41from autotest_lib.server.cros.dynamic_suite.job_status import Status
42
43try:
44    from autotest_lib.server.cros.dynamic_suite import boolparse_lib
45except ImportError as e:
46    print('Unable to import boolparse_lib: %s' % (e,))
47    print('This script must be either:')
48    print('  - Be run in the chroot.')
49    print('  - (not yet supported) be run after running ')
50    print('    ../utils/build_externals.py')
51
52_FILE_BUG_SUITES = ['au', 'bvt', 'bvt-cq', 'bvt-inline', 'paygen_au_beta',
53                    'paygen_au_canary', 'paygen_au_dev', 'paygen_au_stable',
54                    'sanity', 'push_to_prod']
55_AUTOTEST_DIR = global_config.global_config.get_config_value(
56        'SCHEDULER', 'drone_installation_directory')
57
58
59class RetryHandler(object):
60    """Maintain retry information.
61
62    @var _retry_map: A dictionary that stores retry history.
63            The key is afe job id. The value is a dictionary.
64            {job_id: {'state':RetryHandler.States, 'retry_max':int}}
65            - state:
66                The retry state of a job.
67                NOT_ATTEMPTED:
68                    We haven't done anything about the job.
69                ATTEMPTED:
70                    We've made an attempt to schedule a retry job. The
71                    scheduling may or may not be successful, e.g.
72                    it might encounter an rpc error. Note failure
73                    in scheduling a retry is different from a retry job failure.
74                    For each job, we only attempt to schedule a retry once.
75                    For example, assume we have a test with JOB_RETRIES=5 and
76                    its second retry job failed. When we attempt to create
77                    a third retry job to retry the second, we hit an rpc
78                    error. In such case, we will give up on all following
79                    retries.
80                RETRIED:
81                    A retry job has already been successfully
82                    scheduled.
83            - retry_max:
84                The maximum of times the job can still
85                be retried, taking into account retries
86                that have occurred.
87    @var _retry_level: A retry might be triggered only if the result
88            is worse than the level.
89    @var _max_retries: Maximum retry limit at suite level.
90                     Regardless how many times each individual test
91                     has been retried, the total number of retries happening in
92                     the suite can't exceed _max_retries.
93    """
94
95    States = autotest_enum.AutotestEnum('NOT_ATTEMPTED', 'ATTEMPTED', 'RETRIED',
96                                        start_value=1, step=1)
97
98    def __init__(self, initial_jobs_to_tests, retry_level='WARN',
99                 max_retries=None):
100        """Initialize RetryHandler.
101
102        @param initial_jobs_to_tests: A dictionary that maps a job id to
103                a ControlData object. This dictionary should contain
104                jobs that are originally scheduled by the suite.
105        @param retry_level: A retry might be triggered only if the result is
106                worse than the level.
107        @param max_retries: Integer, maxmium total retries allowed
108                                  for the suite. Default to None, no max.
109        """
110        self._retry_map = {}
111        self._retry_level = retry_level
112        self._max_retries = (max_retries
113                             if max_retries is not None else sys.maxsize)
114        for job_id, test in initial_jobs_to_tests.items():
115            if test.job_retries > 0:
116                self._add_job(new_job_id=job_id,
117                              retry_max=test.job_retries)
118            else:
119                logging.debug("Test %s has no retries", test.name)
120
121
122    def _add_job(self, new_job_id, retry_max):
123        """Add a newly-created job to the retry map.
124
125        @param new_job_id: The afe_job_id of a newly created job.
126        @param retry_max: The maximum of times that we could retry
127                          the test if the job fails.
128
129        @raises ValueError if new_job_id is already in retry map.
130
131        """
132        if new_job_id in self._retry_map:
133            raise ValueError('add_job called when job is already in retry map.')
134
135        self._retry_map[new_job_id] = {
136                'state': self.States.NOT_ATTEMPTED,
137                'retry_max': retry_max}
138
139
140    def _suite_max_reached(self):
141        """Return whether maximum retry limit for a suite has been reached."""
142        return self._max_retries <= 0
143
144
145    def add_retry(self, old_job_id, new_job_id):
146        """Record a retry.
147
148        Update retry map with the retry information.
149
150        @param old_job_id: The afe_job_id of the job that is retried.
151        @param new_job_id: The afe_job_id of the retry job.
152
153        @raises KeyError if old_job_id isn't in the retry map.
154        @raises ValueError if we have already retried or made an attempt
155                to retry the old job.
156
157        """
158        old_record = self._retry_map[old_job_id]
159        if old_record['state'] != self.States.NOT_ATTEMPTED:
160            raise ValueError(
161                    'We have already retried or attempted to retry job %d' %
162                    old_job_id)
163        old_record['state'] = self.States.RETRIED
164        self._add_job(new_job_id=new_job_id,
165                      retry_max=old_record['retry_max'] - 1)
166        self._max_retries -= 1
167
168
169    def set_attempted(self, job_id):
170        """Set the state of the job to ATTEMPTED.
171
172        @param job_id: afe_job_id of a job.
173
174        @raises KeyError if job_id isn't in the retry map.
175        @raises ValueError if the current state is not NOT_ATTEMPTED.
176
177        """
178        current_state = self._retry_map[job_id]['state']
179        if current_state != self.States.NOT_ATTEMPTED:
180            # We are supposed to retry or attempt to retry each job
181            # only once. Raise an error if this is not the case.
182            raise ValueError('Unexpected state transition: %s -> %s' %
183                             (self.States.get_string(current_state),
184                              self.States.get_string(self.States.ATTEMPTED)))
185        else:
186            self._retry_map[job_id]['state'] = self.States.ATTEMPTED
187
188
189    def has_following_retry(self, result):
190        """Check whether there will be a following retry.
191
192        We have the following cases for a given job id (result.id),
193        - no retry map entry -> retry not required, no following retry
194        - has retry map entry:
195            - already retried -> has following retry
196            - has not retried
197                (this branch can be handled by checking should_retry(result))
198                - retry_max == 0 --> the last retry job, no more retry
199                - retry_max > 0
200                   - attempted, but has failed in scheduling a
201                     following retry due to rpc error  --> no more retry
202                   - has not attempped --> has following retry if test failed.
203
204        @param result: A result, encapsulating the status of the job.
205
206        @returns: True, if there will be a following retry.
207                  False otherwise.
208
209        """
210        return (result.test_executed
211                and result.id in self._retry_map
212                and (self._retry_map[result.id]['state'] == self.States.RETRIED
213                     or self._should_retry(result)))
214
215
216    def _should_retry(self, result):
217        """Check whether we should retry a job based on its result.
218
219        We will retry the job that corresponds to the result
220        when all of the following are true.
221        a) The test was actually executed, meaning that if
222           a job was aborted before it could ever reach the state
223           of 'Running', the job will not be retried.
224        b) The result is worse than |self._retry_level| which
225           defaults to 'WARN'.
226        c) The test requires retry, i.e. the job has an entry in the retry map.
227        d) We haven't made any retry attempt yet, i.e. state == NOT_ATTEMPTED
228           Note that if a test has JOB_RETRIES=5, and the second time
229           it was retried it hit an rpc error, we will give up on
230           all following retries.
231        e) The job has not reached its retry max, i.e. retry_max > 0
232
233        @param result: A result, encapsulating the status of the job.
234
235        @returns: True if we should retry the job.
236
237        """
238        return (
239            result.test_executed
240            and result.id in self._retry_map
241            and not self._suite_max_reached()
242            and result.is_worse_than(
243                job_status.Status(self._retry_level, '', 'reason'))
244            and self._retry_map[result.id]['state'] == self.States.NOT_ATTEMPTED
245            and self._retry_map[result.id]['retry_max'] > 0
246        )
247
248    def _should_retry_local_job(self, job_id):
249        """Check whether we should retry a job based on information available
250        for a local job without a Result object.
251
252        We will retry the job that corresponds to the result
253        when all of the following are true.
254        a) The test requires retry, i.e. the job has an entry in the retry map.
255        b) We haven't made any retry attempt yet for this job, i.e.
256           state == NOT_ATTEMPTED
257           If the job is aborted,  we will give up on all following retries,
258           regardless of max_retries.
259        c) The job has not reached its retry max, i.e. retry_max > 0
260
261        @param job_id: the id for the job, to look up relevant information.
262
263        @returns: True if we should retry the job.
264
265        """
266        if self._suite_max_reached():
267            logging.debug('suite max_retries reached, not retrying.')
268            return False
269        if job_id not in self._retry_map:
270            logging.debug('job_id not in retry map, not retrying.')
271            return False
272        if self._retry_map[job_id]['state'] != self.States.NOT_ATTEMPTED:
273            logging.debug("job state was %s not 'Not Attempted', not retrying",
274                          self._retry_map[job_id]['state'])
275            return False
276        if self._retry_map[job_id]['retry_max'] <= 0:
277            logging.debug('test-level retries exhausted, not retrying')
278            return False
279        return True
280
281
282    def job_present(self, job_id):
283        """Check whether a job id present in the retry map.
284
285        @param job_id: afe_job_id of a job.
286
287        @returns: A True if the job is present, False if not.
288        """
289        return bool(self._retry_map.get(job_id))
290
291
292
293    def get_retry_max(self, job_id):
294        """Get the maximum times the job can still be retried.
295
296        @param job_id: afe_job_id of a job.
297
298        @returns: An int, representing the maximum times the job can still be
299                  retried.
300        @raises KeyError if job_id isn't in the retry map.
301
302        """
303        return self._retry_map[job_id]['retry_max']
304
305
306class _SuiteChildJobCreator(object):
307    """Create test jobs for a suite."""
308
309    def __init__(
310            self,
311            tag,
312            builds,
313            board,
314            afe=None,
315            max_runtime_mins=24*60,
316            timeout_mins=24*60,
317            suite_job_id=None,
318            ignore_deps=False,
319            extra_deps=(),
320            priority=priorities.Priority.DEFAULT,
321            offload_failures_only=False,
322            test_source_build=None,
323            job_keyvals=None,
324    ):
325        """
326        Constructor
327
328        @param tag: a string with which to tag jobs run in this suite.
329        @param builds: the builds on which we're running this suite.
330        @param board: the board on which we're running this suite.
331        @param afe: an instance of AFE as defined in server/frontend.py.
332        @param max_runtime_mins: Maximum suite runtime, in minutes.
333        @param timeout_mins: Maximum job lifetime, in minutes.
334        @param suite_job_id: Job id that will act as parent id to all sub jobs.
335                             Default: None
336        @param ignore_deps: True if jobs should ignore the DEPENDENCIES
337                            attribute and skip applying of dependency labels.
338                            (Default:False)
339        @param extra_deps: A list of strings which are the extra DEPENDENCIES
340                           to add to each test being scheduled.
341        @param priority: Integer priority level.  Higher is more important.
342        @param offload_failures_only: Only enable gs_offloading for failed
343                                      jobs.
344        @param test_source_build: Build that contains the server-side test code.
345        @param job_keyvals: General job keyvals to be inserted into keyval file,
346                            which will be used by tko/parse later.
347        """
348        self._tag = tag
349        self._builds = builds
350        self._board = board
351        self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
352                                                         delay_sec=10,
353                                                         debug=False)
354        self._max_runtime_mins = max_runtime_mins
355        self._timeout_mins = timeout_mins
356        self._suite_job_id = suite_job_id
357        self._ignore_deps = ignore_deps
358        self._extra_deps = tuple(extra_deps)
359        self._priority = priority
360        self._offload_failures_only = offload_failures_only
361        self._test_source_build = test_source_build
362        self._job_keyvals = job_keyvals
363
364
365    @property
366    def cros_build(self):
367        """Return the CrOS build or the first build in the builds dict."""
368        # TODO(ayatane): Note that the builds dict isn't ordered.  I'm not
369        # sure what the implications of this are, but it's probably not a
370        # good thing.
371        return self._builds.get(provision.CROS_VERSION_PREFIX,
372                                list(self._builds.values())[0])
373
374
375    def create_job(self, test, retry_for=None):
376        """
377        Thin wrapper around frontend.AFE.create_job().
378
379        @param test: ControlData object for a test to run.
380        @param retry_for: If the to-be-created job is a retry for an
381                          old job, the afe_job_id of the old job will
382                          be passed in as |retry_for|, which will be
383                          recorded in the new job's keyvals.
384        @returns: A frontend.Job object with an added test_name member.
385                  test_name is used to preserve the higher level TEST_NAME
386                  name of the job.
387        """
388        # For a system running multiple suites which share tests, the priority
389        # overridden may lead to unexpected scheduling order that adds extra
390        # provision jobs.
391        test_priority = self._priority
392        if utils.is_moblab():
393            test_priority = max(self._priority, test.priority)
394
395        reboot_before = (model_attributes.RebootBefore.NEVER if test.fast
396                         else None)
397
398        test_obj = self._afe.create_job(
399            control_file=test.text,
400            name=tools.create_job_name(
401                    self._test_source_build or self.cros_build,
402                    self._tag,
403                    test.name),
404            control_type=test.test_type.capitalize(),
405            meta_hosts=[self._board]*test.sync_count,
406            dependencies=self._create_job_deps(test),
407            keyvals=self._create_keyvals_for_test_job(test, retry_for),
408            max_runtime_mins=self._max_runtime_mins,
409            timeout_mins=self._timeout_mins,
410            parent_job_id=self._suite_job_id,
411            reboot_before=reboot_before,
412            run_reset=not test.fast,
413            priority=test_priority,
414            synch_count=test.sync_count,
415            require_ssp=test.require_ssp)
416
417        test_obj.test_name = test.name
418        return test_obj
419
420
421    def _create_job_deps(self, test):
422        """Create job deps list for a test job.
423
424        @returns: A list of dependency strings.
425        """
426        if self._ignore_deps:
427            job_deps = []
428        else:
429            job_deps = list(test.dependencies)
430        job_deps.extend(self._extra_deps)
431        return job_deps
432
433
434    def _create_keyvals_for_test_job(self, test, retry_for=None):
435        """Create keyvals dict for creating a test job.
436
437        @param test: ControlData object for a test to run.
438        @param retry_for: If the to-be-created job is a retry for an
439                          old job, the afe_job_id of the old job will
440                          be passed in as |retry_for|, which will be
441                          recorded in the new job's keyvals.
442        @returns: A keyvals dict for creating the test job.
443        """
444        keyvals = {
445            constants.JOB_BUILD_KEY: self.cros_build,
446            constants.JOB_SUITE_KEY: self._tag,
447            constants.JOB_EXPERIMENTAL_KEY: test.experimental,
448            constants.JOB_BUILDS_KEY: self._builds
449        }
450        # test_source_build is saved to job_keyvals so scheduler can retrieve
451        # the build name from database when compiling autoserv commandline.
452        # This avoid a database change to add a new field in afe_jobs.
453        #
454        # Only add `test_source_build` to job keyvals if the build is different
455        # from the CrOS build or the job uses more than one build, e.g., both
456        # firmware and CrOS will be updated in the dut.
457        # This is for backwards compatibility, so the update Autotest code can
458        # compile an autoserv command line to run in a SSP container using
459        # previous builds.
460        if (self._test_source_build and
461            (self.cros_build != self._test_source_build or
462             len(self._builds) > 1)):
463            keyvals[constants.JOB_TEST_SOURCE_BUILD_KEY] = \
464                    self._test_source_build
465            for prefix, build in six.iteritems(self._builds):
466                if prefix == provision.FW_RW_VERSION_PREFIX:
467                    keyvals[constants.FWRW_BUILD]= build
468                elif prefix == provision.FW_RO_VERSION_PREFIX:
469                    keyvals[constants.FWRO_BUILD] = build
470        # Add suite job id to keyvals so tko parser can read it from keyval
471        # file.
472        if self._suite_job_id:
473            keyvals[constants.PARENT_JOB_ID] = self._suite_job_id
474        # We drop the old job's id in the new job's keyval file so that
475        # later our tko parser can figure out the retry relationship and
476        # invalidate the results of the old job in tko database.
477        if retry_for:
478            keyvals[constants.RETRY_ORIGINAL_JOB_ID] = retry_for
479        if self._offload_failures_only:
480            keyvals[constants.JOB_OFFLOAD_FAILURES_KEY] = True
481        if self._job_keyvals:
482            for key in constants.INHERITED_KEYVALS:
483                if key in self._job_keyvals:
484                    keyvals[key] = self._job_keyvals[key]
485        return keyvals
486
487
488class _ControlFileRetriever(object):
489    """Retrieves control files.
490
491    This returns control data instances, unlike control file getters
492    which simply return the control file text contents.
493    """
494
495    def __init__(self, cf_getter, forgiving_parser=True, run_prod_code=False,
496                 test_args=None):
497        """Initialize instance.
498
499        @param cf_getter: a control_file_getter.ControlFileGetter used to list
500               and fetch the content of control files
501        @param forgiving_parser: If False, will raise ControlVariableExceptions
502                                 if any are encountered when parsing control
503                                 files. Note that this can raise an exception
504                                 for syntax errors in unrelated files, because
505                                 we parse them before applying the predicate.
506        @param run_prod_code: If true, the retrieved tests will run the test
507                              code that lives in prod aka the test code
508                              currently on the lab servers by disabling
509                              SSP for the discovered tests.
510        @param test_args: A dict of args to be seeded in test control file under
511                          the name |args_dict|.
512        """
513        self._cf_getter = cf_getter
514        self._forgiving_parser = forgiving_parser
515        self._run_prod_code = run_prod_code
516        self._test_args = test_args
517
518
519    def retrieve_for_test(self, test_name):
520        """Retrieve a test's control data.
521
522        This ignores forgiving_parser because we cannot return a
523        forgiving value.
524
525        @param test_name: Name of test to retrieve.
526
527        @raises ControlVariableException: There is a syntax error in a
528                                          control file.
529
530        @returns a ControlData object
531        """
532        return suite_common.retrieve_control_data_for_test(
533                self._cf_getter, test_name)
534
535
536    def retrieve_for_suite(self, suite_name=''):
537        """Scan through all tests and find all tests.
538
539        @param suite_name: If specified, this method will attempt to restrain
540                           the search space to just this suite's control files.
541
542        @raises ControlVariableException: If forgiving_parser is False and there
543                                          is a syntax error in a control file.
544
545        @returns a dictionary of ControlData objects that based on given
546                 parameters.
547        """
548        tests = suite_common.retrieve_for_suite(
549                self._cf_getter, suite_name, self._forgiving_parser,
550                self._test_args)
551        if self._run_prod_code:
552            for test in six.itervalues(tests):
553                test.require_ssp = False
554
555        return tests
556
557
558def list_all_suites(build, devserver, cf_getter=None):
559    """
560    Parses all ControlData objects with a SUITE tag and extracts all
561    defined suite names.
562
563    @param build: the build on which we're running this suite.
564    @param devserver: the devserver which contains the build.
565    @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
566                      using DevServerGetter.
567
568    @return list of suites
569    """
570    if cf_getter is None:
571        cf_getter = _create_ds_getter(build, devserver)
572
573    suites = set()
574    predicate = lambda t: True
575    for test in find_and_parse_tests(cf_getter, predicate):
576        suites.update(test.suite_tag_parts)
577    return list(suites)
578
579
580def test_file_similarity_predicate(test_file_pattern):
581    """Returns predicate that gets the similarity based on a test's file
582    name pattern.
583
584    Builds a predicate that takes in a parsed control file (a ControlData)
585    and returns a tuple of (file path, ratio), where ratio is the
586    similarity between the test file name and the given test_file_pattern.
587
588    @param test_file_pattern: regular expression (string) to match against
589                              control file names.
590    @return a callable that takes a ControlData and and returns a tuple of
591            (file path, ratio), where ratio is the similarity between the
592            test file name and the given test_file_pattern.
593    """
594    return lambda t: ((None, 0) if not hasattr(t, 'path') else
595            (t.path, difflib.SequenceMatcher(a=t.path,
596                                             b=test_file_pattern).ratio()))
597
598
599def test_name_similarity_predicate(test_name):
600    """Returns predicate that matched based on a test's name.
601
602    Builds a predicate that takes in a parsed control file (a ControlData)
603    and returns a tuple of (test name, ratio), where ratio is the similarity
604    between the test name and the given test_name.
605
606    @param test_name: the test name to base the predicate on.
607    @return a callable that takes a ControlData and returns a tuple of
608            (test name, ratio), where ratio is the similarity between the
609            test name and the given test_name.
610    """
611    return lambda t: ((None, 0) if not hasattr(t, 'name') else
612            (t.name,
613             difflib.SequenceMatcher(a=t.name, b=test_name).ratio()))
614
615
616def matches_attribute_expression_predicate(test_attr_boolstr):
617    """Returns predicate that matches based on boolean expression of
618    attributes.
619
620    Builds a predicate that takes in a parsed control file (a ControlData)
621    ans returns True if the test attributes satisfy the given attribute
622    boolean expression.
623
624    @param test_attr_boolstr: boolean expression of the attributes to be
625                              test, like 'system:all and interval:daily'.
626
627    @return a callable that takes a ControlData and returns True if the test
628            attributes satisfy the given boolean expression.
629    """
630    return lambda t: boolparse_lib.BoolstrResult(
631        test_attr_boolstr, t.attributes)
632
633
634def test_file_matches_pattern_predicate(test_file_pattern):
635    """Returns predicate that matches based on a test's file name pattern.
636
637    Builds a predicate that takes in a parsed control file (a ControlData)
638    and returns True if the test's control file name matches the given
639    regular expression.
640
641    @param test_file_pattern: regular expression (string) to match against
642                              control file names.
643    @return a callable that takes a ControlData and and returns
644            True if control file name matches the pattern.
645    """
646    return lambda t: hasattr(t, 'path') and re.match(test_file_pattern,
647                                                     t.path)
648
649
650def test_name_matches_pattern_predicate(test_name_pattern):
651    """Returns predicate that matches based on a test's name pattern.
652
653    Builds a predicate that takes in a parsed control file (a ControlData)
654    and returns True if the test name matches the given regular expression.
655
656    @param test_name_pattern: regular expression (string) to match against
657                              test names.
658    @return a callable that takes a ControlData and returns
659            True if the name fields matches the pattern.
660    """
661    return lambda t: hasattr(t, 'name') and re.match(test_name_pattern,
662                                                     t.name)
663
664
665def test_name_equals_predicate(test_name):
666    """Returns predicate that matched based on a test's name.
667
668    Builds a predicate that takes in a parsed control file (a ControlData)
669    and returns True if the test name is equal to |test_name|.
670
671    @param test_name: the test name to base the predicate on.
672    @return a callable that takes a ControlData and looks for |test_name|
673            in that ControlData's name.
674    """
675    return lambda t: hasattr(t, 'name') and test_name == t.name
676
677
678def name_in_tag_similarity_predicate(name):
679    """Returns predicate that takes a control file and gets the similarity
680    of the suites in the control file and the given name.
681
682    Builds a predicate that takes in a parsed control file (a ControlData)
683    and returns a list of tuples of (suite name, ratio), where suite name
684    is each suite listed in the control file, and ratio is the similarity
685    between each suite and the given name.
686
687    @param name: the suite name to base the predicate on.
688    @return a callable that takes a ControlData and returns a list of tuples
689            of (suite name, ratio), where suite name is each suite listed in
690            the control file, and ratio is the similarity between each suite
691            and the given name.
692    """
693    return lambda t: [(suite,
694                       difflib.SequenceMatcher(a=suite, b=name).ratio())
695                      for suite in t.suite_tag_parts] or [(None, 0)]
696
697
698def name_in_tag_predicate(name):
699    """Returns predicate that takes a control file and looks for |name|.
700
701    Builds a predicate that takes in a parsed control file (a ControlData)
702    and returns True if the SUITE tag is present and contains |name|.
703
704    @param name: the suite name to base the predicate on.
705    @return a callable that takes a ControlData and looks for |name| in that
706            ControlData object's suite member.
707    """
708    return suite_common.name_in_tag_predicate(name)
709
710
711def create_fs_getter(autotest_dir):
712    """
713    @param autotest_dir: the place to find autotests.
714    @return a FileSystemGetter instance that looks under |autotest_dir|.
715    """
716    # currently hard-coded places to look for tests.
717    subpaths = ['server/site_tests', 'client/site_tests',
718                'server/tests', 'client/tests']
719    directories = [os.path.join(autotest_dir, p) for p in subpaths]
720    return control_file_getter.FileSystemGetter(directories)
721
722
723def _create_ds_getter(build, devserver):
724    """
725    @param build: the build on which we're running this suite.
726    @param devserver: the devserver which contains the build.
727    @return a FileSystemGetter instance that looks under |autotest_dir|.
728    """
729    return control_file_getter.DevServerGetter(build, devserver)
730
731
732def _non_experimental_tests_predicate(test_data):
733    """Test predicate for non-experimental tests."""
734    return not test_data.experimental
735
736
737def find_and_parse_tests(cf_getter, predicate, suite_name='',
738                         add_experimental=False, forgiving_parser=True,
739                         run_prod_code=False, test_args=None):
740    """
741    Function to scan through all tests and find eligible tests.
742
743    Search through all tests based on given cf_getter, suite_name,
744    add_experimental and forgiving_parser, return the tests that match
745    given predicate.
746
747    @param cf_getter: a control_file_getter.ControlFileGetter used to list
748           and fetch the content of control files
749    @param predicate: a function that should return True when run over a
750           ControlData representation of a control file that should be in
751           this Suite.
752    @param suite_name: If specified, this method will attempt to restrain
753                       the search space to just this suite's control files.
754    @param add_experimental: add tests with experimental attribute set.
755    @param forgiving_parser: If False, will raise ControlVariableExceptions
756                             if any are encountered when parsing control
757                             files. Note that this can raise an exception
758                             for syntax errors in unrelated files, because
759                             we parse them before applying the predicate.
760    @param run_prod_code: If true, the suite will run the test code that
761                          lives in prod aka the test code currently on the
762                          lab servers by disabling SSP for the discovered
763                          tests.
764    @param test_args: A dict of args to be seeded in test control file.
765
766    @raises ControlVariableException: If forgiving_parser is False and there
767                                      is a syntax error in a control file.
768
769    @return list of ControlData objects that should be run, with control
770            file text added in |text| attribute. Results are sorted based
771            on the TIME setting in control file, slowest test comes first.
772    """
773    logging.debug('Getting control file list for suite: %s', suite_name)
774    retriever = _ControlFileRetriever(cf_getter,
775                                      forgiving_parser=forgiving_parser,
776                                      run_prod_code=run_prod_code,
777                                      test_args=test_args)
778    tests = retriever.retrieve_for_suite(suite_name)
779    if not add_experimental:
780        predicate = _ComposedPredicate([predicate,
781                                        _non_experimental_tests_predicate])
782    return suite_common.filter_tests(tests, predicate)
783
784
785def find_possible_tests(cf_getter, predicate, suite_name='', count=10):
786    """
787    Function to scan through all tests and find possible tests.
788
789    Search through all tests based on given cf_getter, suite_name,
790    add_experimental and forgiving_parser. Use the given predicate to
791    calculate the similarity and return the top 10 matches.
792
793    @param cf_getter: a control_file_getter.ControlFileGetter used to list
794           and fetch the content of control files
795    @param predicate: a function that should return a tuple of (name, ratio)
796           when run over a ControlData representation of a control file that
797           should be in this Suite. `name` is the key to be compared, e.g.,
798           a suite name or test name. `ratio` is a value between [0,1]
799           indicating the similarity of `name` and the value to be compared.
800    @param suite_name: If specified, this method will attempt to restrain
801                       the search space to just this suite's control files.
802    @param count: Number of suggestions to return, default to 10.
803
804    @return list of top names that similar to the given test, sorted by
805            match ratio.
806    """
807    logging.debug('Getting control file list for suite: %s', suite_name)
808    tests = _ControlFileRetriever(cf_getter).retrieve_for_suite(suite_name)
809    logging.debug('Parsed %s control files.', len(tests))
810    similarities = {}
811    for test in six.itervalues(tests):
812        ratios = predicate(test)
813        # Some predicates may return a list of tuples, e.g.,
814        # name_in_tag_similarity_predicate. Convert all returns to a list.
815        if not isinstance(ratios, list):
816            ratios = [ratios]
817        for name, ratio in ratios:
818            similarities[name] = ratio
819    return [s[0] for s in
820            sorted(list(similarities.items()), key=operator.itemgetter(1),
821                   reverse=True)][:count]
822
823
824def _deprecated_suite_method(func):
825    """Decorator for deprecated Suite static methods.
826
827    TODO(ayatane): This is used to decorate functions that are called as
828    static methods on Suite.
829    """
830    @functools.wraps(func)
831    def wrapper(*args, **kwargs):
832        """Wraps |func| for warning."""
833        warnings.warn('Calling method "%s" from Suite is deprecated' %
834                      func.__name__)
835        return func(*args, **kwargs)
836    return staticmethod(wrapper)
837
838
839class _BaseSuite(object):
840    """
841    A suite of tests, defined by some predicate over control file variables.
842
843    Given a place to search for control files a predicate to match the desired
844    tests, can gather tests and fire off jobs to run them, and then wait for
845    results.
846
847    @var _predicate: a function that should return True when run over a
848         ControlData representation of a control file that should be in
849         this Suite.
850    @var _tag: a string with which to tag jobs run in this suite.
851    @var _builds: the builds on which we're running this suite.
852    @var _afe: an instance of AFE as defined in server/frontend.py.
853    @var _tko: an instance of TKO as defined in server/frontend.py.
854    @var _jobs: currently scheduled jobs, if any.
855    @var _jobs_to_tests: a dictionary that maps job ids to tests represented
856                         ControlData objects.
857    @var _retry: a bool value indicating whether jobs should be retried on
858                 failure.
859    @var _retry_handler: a RetryHandler object.
860
861    """
862
863
864    def __init__(
865            self,
866            tests,
867            tag,
868            builds,
869            board,
870            afe=None,
871            tko=None,
872            pool=None,
873            results_dir=None,
874            max_runtime_mins=24*60,
875            timeout_mins=24*60,
876            file_bugs=False,
877            suite_job_id=None,
878            ignore_deps=False,
879            extra_deps=None,
880            priority=priorities.Priority.DEFAULT,
881            wait_for_results=True,
882            job_retry=False,
883            max_retries=sys.maxsize,
884            offload_failures_only=False,
885            test_source_build=None,
886            job_keyvals=None,
887            child_dependencies=(),
888            result_reporter=None,
889    ):
890        """Initialize instance.
891
892        @param tests: Iterable of tests to run.
893        @param tag: a string with which to tag jobs run in this suite.
894        @param builds: the builds on which we're running this suite.
895        @param board: the board on which we're running this suite.
896        @param afe: an instance of AFE as defined in server/frontend.py.
897        @param tko: an instance of TKO as defined in server/frontend.py.
898        @param pool: Specify the pool of machines to use for scheduling
899                purposes.
900        @param results_dir: The directory where the job can write results to.
901                            This must be set if you want job_id of sub-jobs
902                            list in the job keyvals.
903        @param max_runtime_mins: Maximum suite runtime, in minutes.
904        @param timeout: Maximum job lifetime, in hours.
905        @param suite_job_id: Job id that will act as parent id to all sub jobs.
906                             Default: None
907        @param ignore_deps: True if jobs should ignore the DEPENDENCIES
908                            attribute and skip applying of dependency labels.
909                            (Default:False)
910        @param extra_deps: A list of strings which are the extra DEPENDENCIES
911                           to add to each test being scheduled.
912        @param priority: Integer priority level.  Higher is more important.
913        @param wait_for_results: Set to False to run the suite job without
914                                 waiting for test jobs to finish. Default is
915                                 True.
916        @param job_retry: A bool value indicating whether jobs should be retried
917                          on failure. If True, the field 'JOB_RETRIES' in
918                          control files will be respected. If False, do not
919                          retry.
920        @param max_retries: Maximum retry limit at suite level.
921                            Regardless how many times each individual test
922                            has been retried, the total number of retries
923                            happening in the suite can't exceed _max_retries.
924                            Default to sys.maxint.
925        @param offload_failures_only: Only enable gs_offloading for failed
926                                      jobs.
927        @param test_source_build: Build that contains the server-side test code.
928        @param job_keyvals: General job keyvals to be inserted into keyval file,
929                            which will be used by tko/parse later.
930        @param child_dependencies: (optional) list of dependency strings
931                to be added as dependencies to child jobs.
932        @param result_reporter: A _ResultReporter instance to report results. If
933                None, an _EmailReporter will be created.
934        """
935
936        self.tests = list(tests)
937        self._tag = tag
938        self._builds = builds
939        self._results_dir = results_dir
940        self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
941                                                         delay_sec=10,
942                                                         debug=False)
943        self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
944                                                         delay_sec=10,
945                                                         debug=False)
946        self._jobs = []
947        self._jobs_to_tests = {}
948
949        self._file_bugs = file_bugs
950        self._suite_job_id = suite_job_id
951        self._job_retry=job_retry
952        self._max_retries = max_retries
953        # RetryHandler to be initialized in schedule()
954        self._retry_handler = None
955        self.wait_for_results = wait_for_results
956        self._job_keyvals = job_keyvals
957        if result_reporter is None:
958            self._result_reporter = _EmailReporter(self)
959        else:
960            self._result_reporter = result_reporter
961
962        if extra_deps is None:
963            extra_deps = []
964        extra_deps.append(board)
965        if pool:
966            extra_deps.append(pool)
967        extra_deps.extend(child_dependencies)
968        self._dependencies = tuple(extra_deps)
969
970        self._job_creator = _SuiteChildJobCreator(
971            tag=tag,
972            builds=builds,
973            board=board,
974            afe=afe,
975            max_runtime_mins=max_runtime_mins,
976            timeout_mins=timeout_mins,
977            suite_job_id=suite_job_id,
978            ignore_deps=ignore_deps,
979            extra_deps=extra_deps,
980            priority=priority,
981            offload_failures_only=offload_failures_only,
982            test_source_build=test_source_build,
983            job_keyvals=job_keyvals,
984        )
985
986
987    def _schedule_test(self, record, test, retry_for=None):
988        """Schedule a single test and return the job.
989
990        Schedule a single test by creating a job, and then update relevant
991        data structures that are used to keep track of all running jobs.
992
993        Emits a TEST_NA status log entry if it failed to schedule the test due
994        to NoEligibleHostException or a non-existent board label.
995
996        Returns a frontend.Job object if the test is successfully scheduled.
997        If scheduling failed due to NoEligibleHostException or a non-existent
998        board label, returns None.
999
1000        @param record: A callable to use for logging.
1001                       prototype: record(base_job.status_log_entry)
1002        @param test: ControlData for a test to run.
1003        @param retry_for: If we are scheduling a test to retry an
1004                          old job, the afe_job_id of the old job
1005                          will be passed in as |retry_for|.
1006
1007        @returns: A frontend.Job object or None
1008        """
1009        msg = 'Scheduling %s' % test.name
1010        if retry_for:
1011            msg = msg + ', to retry afe job %d' % retry_for
1012        logging.debug(msg)
1013        begin_time_str = datetime.datetime.now().strftime(time_utils.TIME_FMT)
1014        try:
1015            job = self._job_creator.create_job(test, retry_for=retry_for)
1016        except (error.NoEligibleHostException, proxy.ValidationError) as e:
1017            if (isinstance(e, error.NoEligibleHostException)
1018                or (isinstance(e, proxy.ValidationError)
1019                    and _is_nonexistent_board_error(e))):
1020                # Treat a dependency on a non-existent board label the same as
1021                # a dependency on a board that exists, but for which there's no
1022                # hardware.
1023                logging.debug('%s not applicable for this board/pool. '
1024                              'Emitting TEST_NA.', test.name)
1025                Status('TEST_NA', test.name,
1026                       'Skipping:  test not supported on this board/pool.',
1027                       begin_time_str=begin_time_str).record_all(record)
1028                return None
1029            else:
1030                raise e
1031        except (error.RPCException, proxy.JSONRPCException):
1032            if retry_for:
1033                # Mark that we've attempted to retry the old job.
1034                logging.debug("RPC exception occurred")
1035                self._retry_handler.set_attempted(job_id=retry_for)
1036            raise
1037        else:
1038            self._jobs.append(job)
1039            self._jobs_to_tests[job.id] = test
1040            if retry_for:
1041                # A retry job was just created, record it.
1042                self._retry_handler.add_retry(
1043                        old_job_id=retry_for, new_job_id=job.id)
1044                retry_count = (test.job_retries -
1045                               self._retry_handler.get_retry_max(job.id))
1046                logging.debug('Job %d created to retry job %d. '
1047                              'Have retried for %d time(s)',
1048                              job.id, retry_for, retry_count)
1049            self._remember_job_keyval(job)
1050            return job
1051
1052    def schedule(self, record):
1053        """
1054        Schedule jobs using |self._afe|.
1055
1056        frontend.Job objects representing each scheduled job will be put in
1057        |self._jobs|.
1058
1059        @param record: A callable to use for logging.
1060                       prototype: record(base_job.status_log_entry)
1061        @returns: The number of tests that were scheduled.
1062        """
1063        scheduled_test_names = []
1064        logging.debug('Discovered %d tests.', len(self.tests))
1065
1066        Status('INFO', 'Start %s' % self._tag).record_result(record)
1067        try:
1068            # Write job_keyvals into keyval file.
1069            if self._job_keyvals:
1070                utils.write_keyval(self._results_dir, self._job_keyvals)
1071
1072            # TODO(crbug.com/730885): This is a hack to protect tests that are
1073            # not usually retried from getting hit by a provision error when run
1074            # as part of a suite. Remove this hack once provision is separated
1075            # out in its own suite.
1076            self._bump_up_test_retries(self.tests)
1077            for test in self.tests:
1078                scheduled_job = self._schedule_test(record, test)
1079                if scheduled_job is not None:
1080                    scheduled_test_names.append(test.name)
1081
1082            # Write the num of scheduled tests and name of them to keyval file.
1083            logging.debug('Scheduled %d tests, writing the total to keyval.',
1084                          len(scheduled_test_names))
1085            utils.write_keyval(
1086                self._results_dir,
1087                self._make_scheduled_tests_keyvals(scheduled_test_names))
1088        except Exception:
1089            logging.exception('Exception while scheduling suite')
1090            Status('FAIL', self._tag,
1091                   'Exception while scheduling suite').record_result(record)
1092
1093        if self._job_retry:
1094            logging.debug("Initializing RetryHandler for suite %s.", self._tag)
1095            self._retry_handler = RetryHandler(
1096                    initial_jobs_to_tests=self._jobs_to_tests,
1097                    max_retries=self._max_retries)
1098            logging.debug("retry map created: %s ",
1099                          self._retry_handler._retry_map)
1100        else:
1101            logging.info("Will not retry jobs from suite %s.", self._tag)
1102        return len(scheduled_test_names)
1103
1104
1105    def _bump_up_test_retries(self, tests):
1106        """Bump up individual test retries to match suite retry options."""
1107        if not self._job_retry:
1108            return
1109
1110        for test in tests:
1111            # We do honor if a test insists on JOB_RETRIES = 0.
1112            if test.job_retries is None:
1113                logging.debug(
1114                        'Test %s did not request retries, but suite requires '
1115                        'retries. Bumping retries up to 1. '
1116                        '(See crbug.com/730885)',
1117                        test.name)
1118                test.job_retries = 1
1119
1120
1121    def _make_scheduled_tests_keyvals(self, scheduled_test_names):
1122        """Make a keyvals dict to write for scheduled test names.
1123
1124        @param scheduled_test_names: A list of scheduled test name strings.
1125
1126        @returns: A keyvals dict.
1127        """
1128        return {
1129            constants.SCHEDULED_TEST_COUNT_KEY: len(scheduled_test_names),
1130            constants.SCHEDULED_TEST_NAMES_KEY: repr(scheduled_test_names),
1131        }
1132
1133
1134    def _should_report(self, result):
1135        """
1136        Returns True if this failure requires to be reported.
1137
1138        @param result: A result, encapsulating the status of the failed job.
1139        @return: True if we should report this failure.
1140        """
1141        return (self._file_bugs and result.test_executed and
1142                not result.is_testna() and
1143                result.is_worse_than(job_status.Status('GOOD', '', 'reason')))
1144
1145
1146    def _has_retry(self, result):
1147        """
1148        Return True if this result gets to retry.
1149
1150        @param result: A result, encapsulating the status of the failed job.
1151        @return: bool
1152        """
1153        return (self._job_retry
1154                and self._retry_handler.has_following_retry(result))
1155
1156
1157    def wait(self, record):
1158        """
1159        Polls for the job statuses, using |record| to print status when each
1160        completes.
1161
1162        @param record: callable that records job status.
1163                 prototype:
1164                   record(base_job.status_log_entry)
1165        """
1166        waiter = job_status.JobResultWaiter(self._afe, self._tko)
1167        try:
1168            if self._suite_job_id:
1169                jobs = self._afe.get_jobs(parent_job_id=self._suite_job_id)
1170            else:
1171                logging.warning('Unknown suite_job_id, falling back to less '
1172                                'efficient results_generator.')
1173                jobs = self._jobs
1174            waiter.add_jobs(jobs)
1175            for result in waiter.wait_for_results():
1176                self._handle_result(result=result, record=record, waiter=waiter)
1177                if self._finished_waiting():
1178                    break
1179        except Exception:  # pylint: disable=W0703
1180            logging.exception('Exception waiting for results')
1181            Status('FAIL', self._tag,
1182                   'Exception waiting for results').record_result(record)
1183
1184
1185    def _finished_waiting(self):
1186        """Return whether the suite is finished waiting for child jobs."""
1187        return False
1188
1189
1190    def _handle_result(self, result, record, waiter):
1191        """
1192        Handle a test job result.
1193
1194        @param result: Status instance for job.
1195        @param record: callable that records job status.
1196                 prototype:
1197                   record(base_job.status_log_entry)
1198        @param waiter: JobResultsWaiter instance.
1199
1200        @instance_param _result_reporter: _ResultReporter instance.
1201        """
1202        self._record_result(result, record)
1203        rescheduled = False
1204        if self._job_retry and self._retry_handler._should_retry(result):
1205            rescheduled = self._retry_result(result, record, waiter)
1206        # TODO (crbug.com/751428): If the suite times out before a retry could
1207        # finish, we would lose the chance to report errors from the original
1208        # job.
1209        if self._has_retry(result) and rescheduled:
1210             return
1211
1212        if self._should_report(result):
1213            self._result_reporter.report(result)
1214
1215    def _record_result(self, result, record):
1216        """
1217        Record a test job result.
1218
1219        @param result: Status instance for job.
1220        @param record: callable that records job status.
1221                 prototype:
1222                   record(base_job.status_log_entry)
1223        """
1224        result.record_all(record)
1225        self._remember_job_keyval(result)
1226
1227
1228    def _retry_result(self, result, record, waiter):
1229        """
1230        Retry a test job result.
1231
1232        @param result: Status instance for job.
1233        @param record: callable that records job status.
1234                 prototype:
1235                   record(base_job.status_log_entry)
1236        @param waiter: JobResultsWaiter instance.
1237        @returns: True if a job was scheduled for retry, False otherwise.
1238        """
1239        test = self._jobs_to_tests[result.id]
1240        try:
1241            # It only takes effect for CQ retriable job:
1242            #   1) in first try, test.fast=True.
1243            #   2) in second try, test will be run in normal mode, so reset
1244            #       test.fast=False.
1245            test.fast = False
1246            new_job = self._schedule_test(
1247                    record=record, test=test, retry_for=result.id)
1248        except (error.RPCException, proxy.JSONRPCException) as e:
1249            logging.error('Failed to schedule test: %s, Reason: %s',
1250                          test.name, e)
1251            return False
1252        else:
1253            waiter.add_job(new_job)
1254            return bool(new_job)
1255
1256    @property
1257    def jobs(self):
1258        """Give a copy of the associated jobs
1259
1260        @returns: array of jobs"""
1261        return [job for job in self._jobs]
1262
1263
1264    @property
1265    def _should_file_bugs(self):
1266        """Return whether bugs should be filed.
1267
1268        @returns: bool
1269        """
1270        # File bug when failure is one of the _FILE_BUG_SUITES,
1271        # otherwise send an email to the owner anc cc.
1272        return self._tag in _FILE_BUG_SUITES
1273
1274
1275    def abort(self):
1276        """
1277        Abort all scheduled test jobs.
1278        """
1279        if self._jobs:
1280            job_ids = [job.id for job in self._jobs]
1281            self._afe.run('abort_host_queue_entries', job__id__in=job_ids)
1282
1283
1284    def _remember_job_keyval(self, job):
1285        """
1286        Record provided job as a suite job keyval, for later referencing.
1287
1288        @param job: some representation of a job that has the attributes:
1289                    id, test_name, and owner
1290        """
1291        if self._results_dir and job.id and job.owner and job.test_name:
1292            job_id_owner = '%s-%s' % (job.id, job.owner)
1293            logging.debug('Adding job keyval for %s=%s',
1294                          job.test_name, job_id_owner)
1295            utils.write_keyval(
1296                self._results_dir,
1297                {hashlib.md5(job.test_name).hexdigest(): job_id_owner})
1298
1299
1300class Suite(_BaseSuite):
1301    """
1302    A suite of tests, defined by some predicate over control file variables.
1303
1304    Given a place to search for control files a predicate to match the desired
1305    tests, can gather tests and fire off jobs to run them, and then wait for
1306    results.
1307
1308    @var _predicate: a function that should return True when run over a
1309         ControlData representation of a control file that should be in
1310         this Suite.
1311    @var _tag: a string with which to tag jobs run in this suite.
1312    @var _builds: the builds on which we're running this suite.
1313    @var _afe: an instance of AFE as defined in server/frontend.py.
1314    @var _tko: an instance of TKO as defined in server/frontend.py.
1315    @var _jobs: currently scheduled jobs, if any.
1316    @var _jobs_to_tests: a dictionary that maps job ids to tests represented
1317                         ControlData objects.
1318    @var _cf_getter: a control_file_getter.ControlFileGetter
1319    @var _retry: a bool value indicating whether jobs should be retried on
1320                 failure.
1321    @var _retry_handler: a RetryHandler object.
1322
1323    """
1324
1325    # TODO(ayatane): These methods are kept on the Suite class for
1326    # backward compatibility.
1327    find_and_parse_tests = _deprecated_suite_method(find_and_parse_tests)
1328    find_possible_tests = _deprecated_suite_method(find_possible_tests)
1329    create_fs_getter = _deprecated_suite_method(create_fs_getter)
1330    name_in_tag_predicate = _deprecated_suite_method(
1331            suite_common.name_in_tag_predicate)
1332    name_in_tag_similarity_predicate = _deprecated_suite_method(
1333            name_in_tag_similarity_predicate)
1334    test_name_equals_predicate = _deprecated_suite_method(
1335            test_name_equals_predicate)
1336    test_name_in_list_predicate = _deprecated_suite_method(
1337            suite_common.test_name_in_list_predicate)
1338    test_name_matches_pattern_predicate = _deprecated_suite_method(
1339            test_name_matches_pattern_predicate)
1340    test_file_matches_pattern_predicate = _deprecated_suite_method(
1341            test_file_matches_pattern_predicate)
1342    matches_attribute_expression_predicate = _deprecated_suite_method(
1343            matches_attribute_expression_predicate)
1344    test_name_similarity_predicate = _deprecated_suite_method(
1345            test_name_similarity_predicate)
1346    test_file_similarity_predicate = _deprecated_suite_method(
1347            test_file_similarity_predicate)
1348    list_all_suites = _deprecated_suite_method(list_all_suites)
1349    get_test_source_build = _deprecated_suite_method(
1350            suite_common.get_test_source_build)
1351
1352
1353    @classmethod
1354    def create_from_predicates(cls, predicates, builds, board, devserver,
1355                               cf_getter=None, name='ad_hoc_suite',
1356                               run_prod_code=False, **dargs):
1357        """
1358        Create a Suite using a given predicate test filters.
1359
1360        Uses supplied predicate(s) to instantiate a Suite. Looks for tests in
1361        |autotest_dir| and will schedule them using |afe|.  Pulls control files
1362        from the default dev server. Results will be pulled from |tko| upon
1363        completion.
1364
1365        @param predicates: A list of callables that accept ControlData
1366                           representations of control files. A test will be
1367                           included in suite if all callables in this list
1368                           return True on the given control file.
1369        @param builds: the builds on which we're running this suite. It's a
1370                       dictionary of version_prefix:build.
1371        @param board: the board on which we're running this suite.
1372        @param devserver: the devserver which contains the build.
1373        @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
1374                          using DevServerGetter.
1375        @param name: name of suite. Defaults to 'ad_hoc_suite'
1376        @param run_prod_code: If true, the suite will run the tests that
1377                              lives in prod aka the test code currently on the
1378                              lab servers.
1379        @param **dargs: Any other Suite constructor parameters, as described
1380                        in Suite.__init__ docstring.
1381        @return a Suite instance.
1382        """
1383        if cf_getter is None:
1384            if run_prod_code:
1385                cf_getter = create_fs_getter(_AUTOTEST_DIR)
1386            else:
1387                build = suite_common.get_test_source_build(builds, **dargs)
1388                cf_getter = _create_ds_getter(build, devserver)
1389
1390        return cls(predicates,
1391                   name, builds, board, cf_getter, run_prod_code, **dargs)
1392
1393
1394    @classmethod
1395    def create_from_name(cls, name, builds, board, devserver, cf_getter=None,
1396                         **dargs):
1397        """
1398        Create a Suite using a predicate based on the SUITE control file var.
1399
1400        Makes a predicate based on |name| and uses it to instantiate a Suite
1401        that looks for tests in |autotest_dir| and will schedule them using
1402        |afe|.  Pulls control files from the default dev server.
1403        Results will be pulled from |tko| upon completion.
1404
1405        @param name: a value of the SUITE control file variable to search for.
1406        @param builds: the builds on which we're running this suite. It's a
1407                       dictionary of version_prefix:build.
1408        @param board: the board on which we're running this suite.
1409        @param devserver: the devserver which contains the build.
1410        @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
1411                          using DevServerGetter.
1412        @param **dargs: Any other Suite constructor parameters, as described
1413                        in Suite.__init__ docstring.
1414        @return a Suite instance.
1415        """
1416        if cf_getter is None:
1417            build = suite_common.get_test_source_build(builds, **dargs)
1418            cf_getter = _create_ds_getter(build, devserver)
1419
1420        return cls([suite_common.name_in_tag_predicate(name)],
1421                   name, builds, board, cf_getter, **dargs)
1422
1423
1424    def __init__(
1425            self,
1426            predicates,
1427            tag,
1428            builds,
1429            board,
1430            cf_getter,
1431            run_prod_code=False,
1432            afe=None,
1433            tko=None,
1434            pool=None,
1435            results_dir=None,
1436            max_runtime_mins=24*60,
1437            timeout_mins=24*60,
1438            file_bugs=False,
1439            suite_job_id=None,
1440            ignore_deps=False,
1441            extra_deps=None,
1442            priority=priorities.Priority.DEFAULT,
1443            forgiving_parser=True,
1444            wait_for_results=True,
1445            job_retry=False,
1446            max_retries=sys.maxsize,
1447            offload_failures_only=False,
1448            test_source_build=None,
1449            job_keyvals=None,
1450            test_args=None,
1451            child_dependencies=(),
1452            result_reporter=None,
1453    ):
1454        """
1455        Constructor
1456
1457        @param predicates: A list of callables that accept ControlData
1458                           representations of control files. A test will be
1459                           included in suite if all callables in this list
1460                           return True on the given control file.
1461        @param tag: a string with which to tag jobs run in this suite.
1462        @param builds: the builds on which we're running this suite.
1463        @param board: the board on which we're running this suite.
1464        @param cf_getter: a control_file_getter.ControlFileGetter
1465        @param afe: an instance of AFE as defined in server/frontend.py.
1466        @param tko: an instance of TKO as defined in server/frontend.py.
1467        @param pool: Specify the pool of machines to use for scheduling
1468                purposes.
1469        @param run_prod_code: If true, the suite will run the test code that
1470                              lives in prod aka the test code currently on the
1471                              lab servers.
1472        @param results_dir: The directory where the job can write results to.
1473                            This must be set if you want job_id of sub-jobs
1474                            list in the job keyvals.
1475        @param max_runtime_mins: Maximum suite runtime, in minutes.
1476        @param timeout: Maximum job lifetime, in hours.
1477        @param suite_job_id: Job id that will act as parent id to all sub jobs.
1478                             Default: None
1479        @param ignore_deps: True if jobs should ignore the DEPENDENCIES
1480                            attribute and skip applying of dependency labels.
1481                            (Default:False)
1482        @param extra_deps: A list of strings which are the extra DEPENDENCIES
1483                           to add to each test being scheduled.
1484        @param priority: Integer priority level.  Higher is more important.
1485        @param wait_for_results: Set to False to run the suite job without
1486                                 waiting for test jobs to finish. Default is
1487                                 True.
1488        @param job_retry: A bool value indicating whether jobs should be retried
1489                          on failure. If True, the field 'JOB_RETRIES' in
1490                          control files will be respected. If False, do not
1491                          retry.
1492        @param max_retries: Maximum retry limit at suite level.
1493                            Regardless how many times each individual test
1494                            has been retried, the total number of retries
1495                            happening in the suite can't exceed _max_retries.
1496                            Default to sys.maxint.
1497        @param offload_failures_only: Only enable gs_offloading for failed
1498                                      jobs.
1499        @param test_source_build: Build that contains the server-side test code.
1500        @param job_keyvals: General job keyvals to be inserted into keyval file,
1501                            which will be used by tko/parse later.
1502        @param test_args: A dict of args passed all the way to each individual
1503                          test that will be actually ran.
1504        @param child_dependencies: (optional) list of dependency strings
1505                to be added as dependencies to child jobs.
1506        @param result_reporter: A _ResultReporter instance to report results. If
1507                None, an _EmailReporter will be created.
1508        """
1509        tests = find_and_parse_tests(
1510                cf_getter,
1511                _ComposedPredicate(predicates),
1512                tag,
1513                forgiving_parser=forgiving_parser,
1514                run_prod_code=run_prod_code,
1515                test_args=test_args,
1516        )
1517        super(Suite, self).__init__(
1518                tests=tests,
1519                tag=tag,
1520                builds=builds,
1521                board=board,
1522                afe=afe,
1523                tko=tko,
1524                pool=pool,
1525                results_dir=results_dir,
1526                max_runtime_mins=max_runtime_mins,
1527                timeout_mins=timeout_mins,
1528                file_bugs=file_bugs,
1529                suite_job_id=suite_job_id,
1530                ignore_deps=ignore_deps,
1531                extra_deps=extra_deps,
1532                priority=priority,
1533                wait_for_results=wait_for_results,
1534                job_retry=job_retry,
1535                max_retries=max_retries,
1536                offload_failures_only=offload_failures_only,
1537                test_source_build=test_source_build,
1538                job_keyvals=job_keyvals,
1539                child_dependencies=child_dependencies,
1540                result_reporter=result_reporter,
1541        )
1542
1543
1544class ProvisionSuite(_BaseSuite):
1545    """
1546    A suite for provisioning DUTs.
1547
1548    This is done by creating dummy_Pass tests.
1549    """
1550
1551
1552    def __init__(
1553            self,
1554            tag,
1555            builds,
1556            board,
1557            devserver,
1558            num_required,
1559            num_max=float('inf'),
1560            cf_getter=None,
1561            run_prod_code=False,
1562            test_args=None,
1563            test_source_build=None,
1564            **kwargs):
1565        """
1566        Constructor
1567
1568        @param tag: a string with which to tag jobs run in this suite.
1569        @param builds: the builds on which we're running this suite.
1570        @param board: the board on which we're running this suite.
1571        @param devserver: the devserver which contains the build.
1572        @param num_required: number of tests that must pass.  This is
1573                             capped by the number of tests that are run.
1574        @param num_max: max number of tests to make.  By default there
1575                        is no cap, a test is created for each eligible host.
1576        @param cf_getter: a control_file_getter.ControlFileGetter.
1577        @param test_args: A dict of args passed all the way to each individual
1578                          test that will be actually ran.
1579        @param test_source_build: Build that contains the server-side test code.
1580        @param kwargs: Various keyword arguments passed to
1581                       _BaseSuite constructor.
1582        """
1583        super(ProvisionSuite, self).__init__(
1584                tests=[],
1585                tag=tag,
1586                builds=builds,
1587                board=board,
1588                **kwargs)
1589        self._num_successful = 0
1590        self._num_required = 0
1591        self.tests = []
1592
1593        static_deps = [dep for dep in self._dependencies
1594                       if not provision.Provision.acts_on(dep)]
1595        if 'pool:suites' in static_deps:
1596            logging.info('Provision suite is disabled on suites pool')
1597            return
1598        logging.debug('Looking for hosts matching %r', static_deps)
1599        hosts = self._afe.get_hosts(
1600                invalid=False, multiple_labels=static_deps)
1601        logging.debug('Found %d matching hosts for ProvisionSuite', len(hosts))
1602        available_hosts = [h for h in hosts if h.is_available()]
1603        logging.debug('Found %d available hosts for ProvisionSuite',
1604                      len(available_hosts))
1605        dummy_test = _load_dummy_test(
1606                builds, devserver, cf_getter,
1607                run_prod_code, test_args, test_source_build)
1608        self.tests = [dummy_test] * min(len(available_hosts), num_max)
1609        logging.debug('Made %d tests for ProvisionSuite', len(self.tests))
1610        self._num_required = min(num_required, len(self.tests))
1611        logging.debug('Expecting %d tests to pass for ProvisionSuite',
1612                      self._num_required)
1613
1614    def _handle_result(self, result, record, waiter):
1615        super(ProvisionSuite, self)._handle_result(result, record, waiter)
1616        if result.is_good():
1617            self._num_successful += 1
1618
1619    def _finished_waiting(self):
1620        return self._num_successful >= self._num_required
1621
1622
1623def _load_dummy_test(
1624        builds,
1625        devserver,
1626        cf_getter=None,
1627        run_prod_code=False,
1628        test_args=None,
1629        test_source_build=None):
1630    """
1631    Load and return the dummy pass test.
1632
1633    @param builds: the builds on which we're running this suite.
1634    @param devserver: the devserver which contains the build.
1635    @param cf_getter: a control_file_getter.ControlFileGetter.
1636    @param test_args: A dict of args passed all the way to each individual
1637                      test that will be actually ran.
1638    @param test_source_build: Build that contains the server-side test code.
1639    """
1640    if cf_getter is None:
1641        if run_prod_code:
1642            cf_getter = create_fs_getter(_AUTOTEST_DIR)
1643        else:
1644            build = suite_common.get_test_source_build(
1645                    builds, test_source_build=test_source_build)
1646            devserver.stage_artifacts(image=build,
1647                                      artifacts=['control_files'])
1648            cf_getter = _create_ds_getter(build, devserver)
1649    retriever = _ControlFileRetriever(cf_getter,
1650                                      run_prod_code=run_prod_code,
1651                                      test_args=test_args)
1652    return retriever.retrieve_for_test('dummy_Pass')
1653
1654
1655class _ComposedPredicate(object):
1656    """Return the composition of the predicates.
1657
1658    Predicates are functions that take a test control data object and
1659    return True of that test is to be included.  The returned
1660    predicate's set is the intersection of all of the input predicates'
1661    sets (it returns True if all predicates return True).
1662    """
1663
1664    def __init__(self, predicates):
1665        """Initialize instance.
1666
1667        @param predicates: Iterable of predicates.
1668        """
1669        self._predicates = list(predicates)
1670
1671    def __repr__(self):
1672        return '{cls}({this._predicates!r})'.format(
1673            cls=type(self).__name__,
1674            this=self,
1675        )
1676
1677    def __call__(self, control_data_):
1678        return all(f(control_data_) for f in self._predicates)
1679
1680
1681def _is_nonexistent_board_error(e):
1682    """Return True if error is caused by nonexistent board label.
1683
1684    As of this writing, the particular case we want looks like this:
1685
1686     1) e.problem_keys is a dictionary
1687     2) e.problem_keys['meta_hosts'] exists as the only key
1688        in the dictionary.
1689     3) e.problem_keys['meta_hosts'] matches this pattern:
1690        "Label "board:.*" not found"
1691
1692    We check for conditions 1) and 2) on the
1693    theory that they're relatively immutable.
1694    We don't check condition 3) because it seems
1695    likely to be a maintenance burden, and for the
1696    times when we're wrong, being right shouldn't
1697    matter enough (we _hope_).
1698
1699    @param e: proxy.ValidationError instance
1700    @returns: boolean
1701    """
1702    return (isinstance(e.problem_keys, dict)
1703            and len(e.problem_keys) == 1
1704            and 'meta_hosts' in e.problem_keys)
1705
1706
1707class _ResultReporter(six.with_metaclass(abc.ABCMeta, object)):
1708    """Abstract base class for reporting test results.
1709
1710    Usually, this is used to report test failures.
1711    """
1712
1713    @abc.abstractmethod
1714    def report(self, result):
1715        """Report test result.
1716
1717        @param result: Status instance for job.
1718        """
1719
1720
1721class _EmailReporter(_ResultReporter):
1722    """Class that emails based on test failures."""
1723
1724    def __init__(self, suite, bug_template=None):
1725        self._suite = suite
1726        self._bug_template = bug_template or {}
1727
1728    def _get_test_bug(self, result):
1729        """Get TestBug for the given result.
1730
1731        @param result: Status instance for a test job.
1732        @returns: TestBug instance.
1733        """
1734        # reporting modules have dependency on external packages, e.g., httplib2
1735        # Such dependency can cause issue to any module tries to import suite.py
1736        # without building site-packages first. Since the reporting modules are
1737        # only used in this function, move the imports here avoid the
1738        # requirement of building site packages to use other functions in this
1739        # module.
1740        from autotest_lib.server.cros.dynamic_suite import reporting
1741
1742        job_views = self._suite._tko.run('get_detailed_test_views',
1743                                         afe_job_id=result.id)
1744        return reporting.TestBug(self._suite._job_creator.cros_build,
1745                utils.get_chrome_version(job_views),
1746                self._suite._tag,
1747                result)
1748
1749    def _get_bug_template(self, result):
1750        """Get BugTemplate for test job.
1751
1752        @param result: Status instance for job.
1753        @param bug_template: A template dictionary specifying the default bug
1754                             filing options for failures in this suite.
1755        @returns: BugTemplate instance
1756        """
1757        # reporting modules have dependency on external packages, e.g., httplib2
1758        # Such dependency can cause issue to any module tries to import suite.py
1759        # without building site-packages first. Since the reporting modules are
1760        # only used in this function, move the imports here avoid the
1761        # requirement of building site packages to use other functions in this
1762        # module.
1763        from autotest_lib.server.cros.dynamic_suite import reporting_utils
1764
1765        # Try to merge with bug template in test control file.
1766        template = reporting_utils.BugTemplate(self._bug_template)
1767        try:
1768            test_data = self._suite._jobs_to_tests[result.id]
1769            return template.finalize_bug_template(
1770                    test_data.bug_template)
1771        except AttributeError:
1772            # Test control file does not have bug template defined.
1773            return template.bug_template
1774        except reporting_utils.InvalidBugTemplateException as e:
1775            logging.error('Merging bug templates failed with '
1776                          'error: %s An empty bug template will '
1777                          'be used.', e)
1778            return {}
1779
1780    def report(self, result):
1781        # reporting modules have dependency on external
1782        # packages, e.g., httplib2 Such dependency can cause
1783        # issue to any module tries to import suite.py without
1784        # building site-packages first. Since the reporting
1785        # modules are only used in this function, move the
1786        # imports here avoid the requirement of building site
1787        # packages to use other functions in this module.
1788        from autotest_lib.server.cros.dynamic_suite import reporting
1789
1790        reporting.send_email(
1791                self._get_test_bug(result),
1792                self._get_bug_template(result))
1793