• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (C) 2010 Google Inc. All rights reserved.
2# Copyright (C) 2010 Gabor Rapcsanyi (rgabor@inf.u-szeged.hu), University of Szeged
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met:
7#
8#     * Redistributions of source code must retain the above copyright
9# notice, this list of conditions and the following disclaimer.
10#     * Redistributions in binary form must reproduce the above
11# copyright notice, this list of conditions and the following disclaimer
12# in the documentation and/or other materials provided with the
13# distribution.
14#     * Neither the name of Google Inc. nor the names of its
15# contributors may be used to endorse or promote products derived from
16# this software without specific prior written permission.
17#
18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30import logging
31import signal
32import time
33
34from webkitpy.layout_tests.models import test_expectations
35from webkitpy.layout_tests.models import test_failures
36
37
38_log = logging.getLogger(__name__)
39
40OK_EXIT_STATUS = 0
41
42# This matches what the shell does on POSIX.
43INTERRUPTED_EXIT_STATUS = signal.SIGINT + 128
44
45# POSIX limits status codes to 0-255. Normally run-webkit-tests returns the number
46# of tests that failed. These indicate exceptional conditions triggered by the
47# script itself, so we count backwards from 255 (aka -1) to enumerate them.
48SYS_DEPS_EXIT_STATUS = 252
49NO_TESTS_EXIT_STATUS = 253
50NO_DEVICES_EXIT_STATUS = 254
51UNEXPECTED_ERROR_EXIT_STATUS = 255
52
53ERROR_CODES = (
54    INTERRUPTED_EXIT_STATUS,
55    SYS_DEPS_EXIT_STATUS,
56    NO_TESTS_EXIT_STATUS,
57    NO_DEVICES_EXIT_STATUS,
58    UNEXPECTED_ERROR_EXIT_STATUS,
59)
60
61
62class TestRunException(Exception):
63    def __init__(self, code, msg):
64        self.code = code
65        self.msg = msg
66
67
68class TestRunResults(object):
69    def __init__(self, expectations, num_tests):
70        self.total = num_tests
71        self.remaining = self.total
72        self.expectations = expectations
73        self.expected = 0
74        self.expected_failures = 0
75        self.unexpected = 0
76        self.unexpected_failures = 0
77        self.unexpected_crashes = 0
78        self.unexpected_timeouts = 0
79        self.tests_by_expectation = {}
80        self.tests_by_timeline = {}
81        self.results_by_name = {}  # Map of test name to the last result for the test.
82        self.all_results = []  # All results from a run, including every iteration of every test.
83        self.unexpected_results_by_name = {}
84        self.failures_by_name = {}
85        self.total_failures = 0
86        self.expected_skips = 0
87        for expectation in test_expectations.TestExpectations.EXPECTATIONS.values():
88            self.tests_by_expectation[expectation] = set()
89        for timeline in test_expectations.TestExpectations.TIMELINES.values():
90            self.tests_by_timeline[timeline] = expectations.get_tests_with_timeline(timeline)
91        self.slow_tests = set()
92        self.interrupted = False
93        self.keyboard_interrupted = False
94        self.run_time = 0  # The wall clock time spent running the tests (layout_test_runner.run()).
95
96    def add(self, test_result, expected, test_is_slow):
97        result_type_for_stats = test_result.type
98        if test_expectations.WONTFIX in self.expectations.model().get_expectations(test_result.test_name):
99            result_type_for_stats = test_expectations.WONTFIX
100        self.tests_by_expectation[result_type_for_stats].add(test_result.test_name)
101
102        self.results_by_name[test_result.test_name] = test_result
103        if test_result.type != test_expectations.SKIP:
104            self.all_results.append(test_result)
105        self.remaining -= 1
106        if len(test_result.failures):
107            self.total_failures += 1
108            self.failures_by_name[test_result.test_name] = test_result.failures
109        if expected:
110            self.expected += 1
111            if test_result.type == test_expectations.SKIP:
112                self.expected_skips += 1
113            elif test_result.type != test_expectations.PASS:
114                self.expected_failures += 1
115        else:
116            self.unexpected_results_by_name[test_result.test_name] = test_result
117            self.unexpected += 1
118            if len(test_result.failures):
119                self.unexpected_failures += 1
120            if test_result.type == test_expectations.CRASH:
121                self.unexpected_crashes += 1
122            elif test_result.type == test_expectations.TIMEOUT:
123                self.unexpected_timeouts += 1
124        if test_is_slow:
125            self.slow_tests.add(test_result.test_name)
126
127
128class RunDetails(object):
129    def __init__(self, exit_code, summarized_full_results=None, summarized_failing_results=None, initial_results=None, retry_results=None, enabled_pixel_tests_in_retry=False):
130        self.exit_code = exit_code
131        self.summarized_full_results = summarized_full_results
132        self.summarized_failing_results = summarized_failing_results
133        self.initial_results = initial_results
134        self.retry_results = retry_results
135        self.enabled_pixel_tests_in_retry = enabled_pixel_tests_in_retry
136
137
138def _interpret_test_failures(failures):
139    test_dict = {}
140    failure_types = [type(failure) for failure in failures]
141    # FIXME: get rid of all this is_* values once there is a 1:1 map between
142    # TestFailure type and test_expectations.EXPECTATION.
143    if test_failures.FailureMissingAudio in failure_types:
144        test_dict['is_missing_audio'] = True
145
146    if test_failures.FailureMissingResult in failure_types:
147        test_dict['is_missing_text'] = True
148
149    if test_failures.FailureMissingImage in failure_types or test_failures.FailureMissingImageHash in failure_types:
150        test_dict['is_missing_image'] = True
151
152    return test_dict
153
154
155def summarize_results(port_obj, expectations, initial_results, retry_results, enabled_pixel_tests_in_retry, only_include_failing=False):
156    """Returns a dictionary containing a summary of the test runs, with the following fields:
157        'version': a version indicator
158        'fixable': The number of fixable tests (NOW - PASS)
159        'skipped': The number of skipped tests (NOW & SKIPPED)
160        'num_regressions': The number of non-flaky failures
161        'num_flaky': The number of flaky failures
162        'num_passes': The number of unexpected passes
163        'tests': a dict of tests -> {'expected': '...', 'actual': '...'}
164    """
165    results = {}
166    results['version'] = 3
167
168    tbe = initial_results.tests_by_expectation
169    tbt = initial_results.tests_by_timeline
170    results['fixable'] = len(tbt[test_expectations.NOW] - tbe[test_expectations.PASS])
171    # FIXME: Remove this. It is redundant with results['num_failures_by_type'].
172    results['skipped'] = len(tbt[test_expectations.NOW] & tbe[test_expectations.SKIP])
173
174    num_passes = 0
175    num_flaky = 0
176    num_regressions = 0
177    keywords = {}
178    for expecation_string, expectation_enum in test_expectations.TestExpectations.EXPECTATIONS.iteritems():
179        keywords[expectation_enum] = expecation_string.upper()
180
181    num_failures_by_type = {}
182    for expectation in initial_results.tests_by_expectation:
183        tests = initial_results.tests_by_expectation[expectation]
184        if expectation != test_expectations.WONTFIX:
185            tests &= tbt[test_expectations.NOW]
186        num_failures_by_type[keywords[expectation]] = len(tests)
187    # The number of failures by type.
188    results['num_failures_by_type'] = num_failures_by_type
189
190    tests = {}
191
192    for test_name, result in initial_results.results_by_name.iteritems():
193        expected = expectations.get_expectations_string(test_name)
194        result_type = result.type
195        actual = [keywords[result_type]]
196
197        if only_include_failing and result.type == test_expectations.SKIP:
198            continue
199
200        if result_type == test_expectations.PASS:
201            num_passes += 1
202            if not result.has_stderr and only_include_failing:
203                continue
204        elif result_type != test_expectations.SKIP and test_name in initial_results.unexpected_results_by_name:
205            if retry_results and test_name not in retry_results.unexpected_results_by_name:
206                actual.extend(expectations.get_expectations_string(test_name).split(" "))
207                num_flaky += 1
208            elif retry_results:
209                retry_result_type = retry_results.unexpected_results_by_name[test_name].type
210                num_regressions += 1
211                if not keywords[retry_result_type] in actual:
212                    actual.append(keywords[retry_result_type])
213            else:
214                num_regressions += 1
215
216        test_dict = {}
217
218        rounded_run_time = round(result.test_run_time, 1)
219        if rounded_run_time:
220            test_dict['time'] = rounded_run_time
221
222        if result.has_stderr:
223            test_dict['has_stderr'] = True
224
225        bugs = expectations.model().get_expectation_line(test_name).bugs
226        if bugs:
227            test_dict['bugs'] = bugs
228
229        if result.reftest_type:
230            test_dict.update(reftest_type=list(result.reftest_type))
231
232        test_dict['expected'] = expected
233        test_dict['actual'] = " ".join(actual)
234
235        def is_expected(actual_result):
236            return expectations.matches_an_expected_result(test_name, result_type, port_obj.get_option('pixel_tests') or result.reftest_type)
237
238        # To avoid bloating the output results json too much, only add an entry for whether the failure is unexpected.
239        if not all(is_expected(actual_result) for actual_result in actual):
240            test_dict['is_unexpected'] = True
241
242        test_dict.update(_interpret_test_failures(result.failures))
243
244        if retry_results:
245            retry_result = retry_results.unexpected_results_by_name.get(test_name)
246            if retry_result:
247                test_dict.update(_interpret_test_failures(retry_result.failures))
248
249        # Store test hierarchically by directory. e.g.
250        # foo/bar/baz.html: test_dict
251        # foo/bar/baz1.html: test_dict
252        #
253        # becomes
254        # foo: {
255        #     bar: {
256        #         baz.html: test_dict,
257        #         baz1.html: test_dict
258        #     }
259        # }
260        parts = test_name.split('/')
261        current_map = tests
262        for i, part in enumerate(parts):
263            if i == (len(parts) - 1):
264                current_map[part] = test_dict
265                break
266            if part not in current_map:
267                current_map[part] = {}
268            current_map = current_map[part]
269
270    results['tests'] = tests
271    # FIXME: Remove this. It is redundant with results['num_failures_by_type'].
272    results['num_passes'] = num_passes
273    results['num_flaky'] = num_flaky
274    # FIXME: Remove this. It is redundant with results['num_failures_by_type'].
275    results['num_regressions'] = num_regressions
276    results['interrupted'] = initial_results.interrupted  # Does results.html have enough information to compute this itself? (by checking total number of results vs. total number of tests?)
277    results['layout_tests_dir'] = port_obj.layout_tests_dir()
278    results['has_wdiff'] = port_obj.wdiff_available()
279    results['has_pretty_patch'] = port_obj.pretty_patch_available()
280    results['pixel_tests_enabled'] = port_obj.get_option('pixel_tests')
281    results['seconds_since_epoch'] = int(time.time())
282    results['build_number'] = port_obj.get_option('build_number')
283    results['builder_name'] = port_obj.get_option('builder_name')
284
285    try:
286        # Don't do this by default since it takes >100ms.
287        # It's only used for uploading data to the flakiness dashboard.
288        if port_obj.get_option("builder_name"):
289            port_obj.host.initialize_scm()
290            for (name, path) in port_obj.repository_paths():
291                results[name.lower() + '_revision'] = port_obj.host.scm().svn_revision(path)
292    except Exception, e:
293        _log.warn("Failed to determine svn revision for checkout (cwd: %s, webkit_base: %s), leaving 'revision' key blank in full_results.json.\n%s" % (port_obj._filesystem.getcwd(), port_obj.path_from_webkit_base(), e))
294        # Handle cases where we're running outside of version control.
295        import traceback
296        _log.debug('Failed to learn head svn revision:')
297        _log.debug(traceback.format_exc())
298        results['chromium_revision'] = ""
299        results['blink_revision'] = ""
300
301    return results
302