• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (C) 2011, Google Inc. All rights reserved.
2#
3# Redistribution and use in source and binary forms, with or without
4# modification, are permitted provided that the following conditions are
5# met:
6#
7#     * Redistributions of source code must retain the above copyright
8# notice, this list of conditions and the following disclaimer.
9#     * Redistributions in binary form must reproduce the above
10# copyright notice, this list of conditions and the following disclaimer
11# in the documentation and/or other materials provided with the
12# distribution.
13#     * Neither the name of Google Inc. nor the names of its
14# contributors may be used to endorse or promote products derived from
15# this software without specific prior written permission.
16#
17# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29import copy
30import logging
31
32from webkitpy.common.memoized import memoized
33
34_log = logging.getLogger(__name__)
35
36
37# FIXME: Should this function be somewhere more general?
38def _invert_dictionary(dictionary):
39    inverted_dictionary = {}
40    for key, value in dictionary.items():
41        if inverted_dictionary.get(value):
42            inverted_dictionary[value].append(key)
43        else:
44            inverted_dictionary[value] = [key]
45    return inverted_dictionary
46
47
48class BaselineOptimizer(object):
49    ROOT_LAYOUT_TESTS_DIRECTORY = 'LayoutTests'
50
51    def __init__(self, host, port_names, skip_scm_commands):
52        self._filesystem = host.filesystem
53        self._port_factory = host.port_factory
54        self._skip_scm_commands = skip_scm_commands
55        self._files_to_delete = []
56        self._files_to_add = []
57        self._scm = host.scm()
58        self._port_names = port_names
59        # Only used by unittests.
60        self.new_results_by_directory = []
61
62    def _baseline_root(self, port, baseline_name):
63        virtual_suite = port.lookup_virtual_suite(baseline_name)
64        if virtual_suite:
65            return self._filesystem.join(self.ROOT_LAYOUT_TESTS_DIRECTORY, virtual_suite.name)
66        return self.ROOT_LAYOUT_TESTS_DIRECTORY
67
68    def _baseline_search_path(self, port, baseline_name):
69        virtual_suite = port.lookup_virtual_suite(baseline_name)
70        if virtual_suite:
71            return port.virtual_baseline_search_path(baseline_name)
72        return port.baseline_search_path()
73
74    @memoized
75    def _relative_baseline_search_paths(self, port_name, baseline_name):
76        port = self._port_factory.get(port_name)
77        relative_paths = [self._filesystem.relpath(path, port.webkit_base()) for path in self._baseline_search_path(port, baseline_name)]
78        return relative_paths + [self._baseline_root(port, baseline_name)]
79
80    def _join_directory(self, directory, baseline_name):
81        # This code is complicated because both the directory name and the baseline_name have the virtual
82        # test suite in the name and the virtual baseline name is not a strict superset of the non-virtual name.
83        # For example, virtual/softwarecompositing/foo-expected.png corresponds to compostiting/foo-expected.png and
84        # the baseline directories are like platform/mac/virtual/softwarecompositing. So, to get the path
85        # to the baseline in the platform directory, we need to append jsut foo-expected.png to the directory.
86        virtual_suite = self._port_factory.get().lookup_virtual_suite(baseline_name)
87        if virtual_suite:
88            baseline_name_without_virtual = baseline_name[len(virtual_suite.name) + 1:]
89        else:
90            baseline_name_without_virtual = baseline_name
91        return self._filesystem.join(self._scm.checkout_root, directory, baseline_name_without_virtual)
92
93    def read_results_by_directory(self, baseline_name):
94        results_by_directory = {}
95        directories = reduce(set.union, map(set, [self._relative_baseline_search_paths(port_name, baseline_name) for port_name in self._port_names]))
96
97        for directory in directories:
98            path = self._join_directory(directory, baseline_name)
99            if self._filesystem.exists(path):
100                results_by_directory[directory] = self._filesystem.sha1(path)
101        return results_by_directory
102
103    def _results_by_port_name(self, results_by_directory, baseline_name):
104        results_by_port_name = {}
105        for port_name in self._port_names:
106            for directory in self._relative_baseline_search_paths(port_name, baseline_name):
107                if directory in results_by_directory:
108                    results_by_port_name[port_name] = results_by_directory[directory]
109                    break
110        return results_by_port_name
111
112    @memoized
113    def _directories_immediately_preceding_root(self, baseline_name):
114        directories = set()
115        for port_name in self._port_names:
116            port = self._port_factory.get(port_name)
117            directory = self._filesystem.relpath(self._baseline_search_path(port, baseline_name)[-1], port.webkit_base())
118            directories.add(directory)
119        return directories
120
121    def _optimize_result_for_root(self, new_results_by_directory, baseline_name):
122        # The root directory (i.e. LayoutTests) is the only one that doesn't correspond
123        # to a specific platform. As such, it's the only one where the baseline in fallback directories
124        # immediately before it can be promoted up, i.e. if win and mac
125        # have the same baseline, then it can be promoted up to be the LayoutTests baseline.
126        # All other baselines can only be removed if they're redundant with a baseline earlier
127        # in the fallback order. They can never promoted up.
128        directories_immediately_preceding_root = self._directories_immediately_preceding_root(baseline_name)
129
130        shared_result = None
131        root_baseline_unused = False
132        for directory in directories_immediately_preceding_root:
133            this_result = new_results_by_directory.get(directory)
134
135            # If any of these directories don't have a baseline, there's no optimization we can do.
136            if not this_result:
137                return
138
139            if not shared_result:
140                shared_result = this_result
141            elif shared_result != this_result:
142                root_baseline_unused = True
143
144        baseline_root = self._baseline_root(self._port_factory.get(), baseline_name)
145
146        # The root baseline is unused if all the directories immediately preceding the root
147        # have a baseline, but have different baselines, so the baselines can't be promoted up.
148        if root_baseline_unused:
149            if baseline_root in new_results_by_directory:
150                del new_results_by_directory[baseline_root]
151            return
152
153        new_results_by_directory[baseline_root] = shared_result
154        for directory in directories_immediately_preceding_root:
155            del new_results_by_directory[directory]
156
157    def _find_optimal_result_placement(self, baseline_name):
158        results_by_directory = self.read_results_by_directory(baseline_name)
159        results_by_port_name = self._results_by_port_name(results_by_directory, baseline_name)
160        port_names_by_result = _invert_dictionary(results_by_port_name)
161
162        new_results_by_directory = self._remove_redundant_results(results_by_directory, results_by_port_name, port_names_by_result, baseline_name)
163        self._optimize_result_for_root(new_results_by_directory, baseline_name)
164
165        return results_by_directory, new_results_by_directory
166
167    def _remove_redundant_results(self, results_by_directory, results_by_port_name, port_names_by_result, baseline_name):
168        new_results_by_directory = copy.copy(results_by_directory)
169        for port_name in self._port_names:
170            current_result = results_by_port_name.get(port_name)
171
172            # This happens if we're missing baselines for a port.
173            if not current_result:
174                continue;
175
176            fallback_path = self._relative_baseline_search_paths(port_name, baseline_name)
177            current_index, current_directory = self._find_in_fallbackpath(fallback_path, current_result, new_results_by_directory)
178            for index in range(current_index + 1, len(fallback_path)):
179                new_directory = fallback_path[index]
180                if not new_directory in new_results_by_directory:
181                    # No result for this baseline in this directory.
182                    continue
183                elif new_results_by_directory[new_directory] == current_result:
184                    # Result for new_directory are redundant with the result earlier in the fallback order.
185                    if current_directory in new_results_by_directory:
186                        del new_results_by_directory[current_directory]
187                else:
188                    # The new_directory contains a different result, so stop trying to push results up.
189                    break
190
191        return new_results_by_directory
192
193    def _find_in_fallbackpath(self, fallback_path, current_result, results_by_directory):
194        for index, directory in enumerate(fallback_path):
195            if directory in results_by_directory and (results_by_directory[directory] == current_result):
196                return index, directory
197        assert False, "result %s not found in fallback_path %s, %s" % (current_result, fallback_path, results_by_directory)
198
199    def _platform(self, filename):
200        platform_dir = self.ROOT_LAYOUT_TESTS_DIRECTORY + self._filesystem.sep + 'platform' + self._filesystem.sep
201        if filename.startswith(platform_dir):
202            return filename.replace(platform_dir, '').split(self._filesystem.sep)[0]
203        platform_dir = self._filesystem.join(self._scm.checkout_root, platform_dir)
204        if filename.startswith(platform_dir):
205            return filename.replace(platform_dir, '').split(self._filesystem.sep)[0]
206        return '(generic)'
207
208    def _move_baselines(self, baseline_name, results_by_directory, new_results_by_directory):
209        data_for_result = {}
210        for directory, result in results_by_directory.items():
211            if not result in data_for_result:
212                source = self._join_directory(directory, baseline_name)
213                data_for_result[result] = self._filesystem.read_binary_file(source)
214
215        scm_files = []
216        fs_files = []
217        for directory, result in results_by_directory.items():
218            if new_results_by_directory.get(directory) != result:
219                file_name = self._join_directory(directory, baseline_name)
220                if self._scm.exists(file_name):
221                    scm_files.append(file_name)
222                else:
223                    fs_files.append(file_name)
224
225        if scm_files or fs_files:
226            if scm_files:
227                _log.debug("    Deleting (SCM):")
228                for platform_dir in sorted(self._platform(filename) for filename in scm_files):
229                    _log.debug("      " + platform_dir)
230                if self._skip_scm_commands:
231                    self._files_to_delete.extend(scm_files)
232                else:
233                    self._scm.delete_list(scm_files)
234            if fs_files:
235                _log.debug("    Deleting (file system):")
236                for platform_dir in sorted(self._platform(filename) for filename in fs_files):
237                    _log.debug("      " + platform_dir)
238                for filename in fs_files:
239                    self._filesystem.remove(filename)
240        else:
241            _log.debug("    (Nothing to delete)")
242
243        file_names = []
244        for directory, result in new_results_by_directory.items():
245            if results_by_directory.get(directory) != result:
246                destination = self._join_directory(directory, baseline_name)
247                self._filesystem.maybe_make_directory(self._filesystem.split(destination)[0])
248                self._filesystem.write_binary_file(destination, data_for_result[result])
249                file_names.append(destination)
250
251        if file_names:
252            _log.debug("    Adding:")
253            for platform_dir in sorted(self._platform(filename) for filename in file_names):
254                _log.debug("      " + platform_dir)
255            if self._skip_scm_commands:
256                # Have adds win over deletes.
257                self._files_to_delete = list(set(self._files_to_delete) - set(file_names))
258                self._files_to_add.extend(file_names)
259            else:
260                self._scm.add_list(file_names)
261        else:
262            _log.debug("    (Nothing to add)")
263
264    def write_by_directory(self, results_by_directory, writer, indent):
265        for path in sorted(results_by_directory):
266            writer("%s%s: %s" % (indent, self._platform(path), results_by_directory[path][0:6]))
267
268    def _optimize_subtree(self, baseline_name):
269        basename = self._filesystem.basename(baseline_name)
270        results_by_directory, new_results_by_directory = self._find_optimal_result_placement(baseline_name)
271
272        if new_results_by_directory == results_by_directory:
273            if new_results_by_directory:
274                _log.debug("  %s: (already optimal)" % basename)
275                self.write_by_directory(results_by_directory, _log.debug, "    ")
276            else:
277                _log.debug("  %s: (no baselines found)" % basename)
278            # This is just used for unittests. Intentionally set it to the old data if we don't modify anything.
279            self.new_results_by_directory.append(results_by_directory)
280            return True
281
282        if self._results_by_port_name(results_by_directory, baseline_name) != self._results_by_port_name(new_results_by_directory, baseline_name):
283            # This really should never happen. Just a sanity check to make sure the script fails in the case of bugs
284            # instead of committing incorrect baselines.
285            _log.error("  %s: optimization failed" % basename)
286            self.write_by_directory(results_by_directory, _log.warning, "      ")
287            return False
288
289        _log.debug("  %s:" % basename)
290        _log.debug("    Before: ")
291        self.write_by_directory(results_by_directory, _log.debug, "      ")
292        _log.debug("    After: ")
293        self.write_by_directory(new_results_by_directory, _log.debug, "      ")
294
295        self._move_baselines(baseline_name, results_by_directory, new_results_by_directory)
296        return True
297
298    def _optimize_virtual_root(self, baseline_name, non_virtual_baseline_name):
299        default_port = self._port_factory.get()
300        virtual_root_expected_baseline_path = self._filesystem.join(default_port.layout_tests_dir(), baseline_name)
301        if not self._filesystem.exists(virtual_root_expected_baseline_path):
302            return
303        root_sha1 = self._filesystem.sha1(virtual_root_expected_baseline_path)
304
305        results_by_directory = self.read_results_by_directory(non_virtual_baseline_name)
306        # See if all the immediate predecessors of the virtual root have the same expected result.
307        for port_name in self._port_names:
308            directories = self._relative_baseline_search_paths(port_name, non_virtual_baseline_name)
309            for directory in directories:
310                if directory not in results_by_directory:
311                    continue
312                if results_by_directory[directory] != root_sha1:
313                    return
314                break
315
316        _log.debug("Deleting redundant virtual root expected result.")
317        if self._skip_scm_commands:
318            self._files_to_delete.append(virtual_root_expected_baseline_path)
319        else:
320            self._scm.delete(virtual_root_expected_baseline_path)
321
322    def optimize(self, baseline_name):
323        # The virtual fallback path is the same as the non-virtual one tacked on to the bottom of the non-virtual path.
324        # See https://docs.google.com/a/chromium.org/drawings/d/1eGdsIKzJ2dxDDBbUaIABrN4aMLD1bqJTfyxNGZsTdmg/edit for
325        # a visual representation of this.
326        #
327        # So, we can optimize the virtual path, then the virtual root and then the regular path.
328
329        _log.debug("Optimizing regular fallback path.")
330        result = self._optimize_subtree(baseline_name)
331        non_virtual_baseline_name = self._port_factory.get().lookup_virtual_test_base(baseline_name)
332        if not non_virtual_baseline_name:
333            return result, self._files_to_delete, self._files_to_add
334
335        self._optimize_virtual_root(baseline_name, non_virtual_baseline_name)
336
337        _log.debug("Optimizing non-virtual fallback path.")
338        result |= self._optimize_subtree(non_virtual_baseline_name)
339        return result, self._files_to_delete, self._files_to_add
340