1# Copyright (C) 2011, Google Inc. All rights reserved. 2# 3# Redistribution and use in source and binary forms, with or without 4# modification, are permitted provided that the following conditions are 5# met: 6# 7# * Redistributions of source code must retain the above copyright 8# notice, this list of conditions and the following disclaimer. 9# * Redistributions in binary form must reproduce the above 10# copyright notice, this list of conditions and the following disclaimer 11# in the documentation and/or other materials provided with the 12# distribution. 13# * Neither the name of Google Inc. nor the names of its 14# contributors may be used to endorse or promote products derived from 15# this software without specific prior written permission. 16# 17# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29import copy 30import logging 31 32from webkitpy.common.memoized import memoized 33 34_log = logging.getLogger(__name__) 35 36 37# FIXME: Should this function be somewhere more general? 38def _invert_dictionary(dictionary): 39 inverted_dictionary = {} 40 for key, value in dictionary.items(): 41 if inverted_dictionary.get(value): 42 inverted_dictionary[value].append(key) 43 else: 44 inverted_dictionary[value] = [key] 45 return inverted_dictionary 46 47 48class BaselineOptimizer(object): 49 ROOT_LAYOUT_TESTS_DIRECTORY = 'LayoutTests' 50 51 def __init__(self, host, port_names, skip_scm_commands): 52 self._filesystem = host.filesystem 53 self._port_factory = host.port_factory 54 self._skip_scm_commands = skip_scm_commands 55 self._files_to_delete = [] 56 self._files_to_add = [] 57 self._scm = host.scm() 58 self._port_names = port_names 59 # Only used by unittests. 60 self.new_results_by_directory = [] 61 62 def _baseline_root(self, port, baseline_name): 63 virtual_suite = port.lookup_virtual_suite(baseline_name) 64 if virtual_suite: 65 return self._filesystem.join(self.ROOT_LAYOUT_TESTS_DIRECTORY, virtual_suite.name) 66 return self.ROOT_LAYOUT_TESTS_DIRECTORY 67 68 def _baseline_search_path(self, port, baseline_name): 69 virtual_suite = port.lookup_virtual_suite(baseline_name) 70 if virtual_suite: 71 return port.virtual_baseline_search_path(baseline_name) 72 return port.baseline_search_path() 73 74 @memoized 75 def _relative_baseline_search_paths(self, port_name, baseline_name): 76 port = self._port_factory.get(port_name) 77 relative_paths = [self._filesystem.relpath(path, port.webkit_base()) for path in self._baseline_search_path(port, baseline_name)] 78 return relative_paths + [self._baseline_root(port, baseline_name)] 79 80 def _join_directory(self, directory, baseline_name): 81 # This code is complicated because both the directory name and the baseline_name have the virtual 82 # test suite in the name and the virtual baseline name is not a strict superset of the non-virtual name. 83 # For example, virtual/softwarecompositing/foo-expected.png corresponds to compostiting/foo-expected.png and 84 # the baseline directories are like platform/mac/virtual/softwarecompositing. So, to get the path 85 # to the baseline in the platform directory, we need to append jsut foo-expected.png to the directory. 86 virtual_suite = self._port_factory.get().lookup_virtual_suite(baseline_name) 87 if virtual_suite: 88 baseline_name_without_virtual = baseline_name[len(virtual_suite.name) + 1:] 89 else: 90 baseline_name_without_virtual = baseline_name 91 return self._filesystem.join(self._scm.checkout_root, directory, baseline_name_without_virtual) 92 93 def read_results_by_directory(self, baseline_name): 94 results_by_directory = {} 95 directories = reduce(set.union, map(set, [self._relative_baseline_search_paths(port_name, baseline_name) for port_name in self._port_names])) 96 97 for directory in directories: 98 path = self._join_directory(directory, baseline_name) 99 if self._filesystem.exists(path): 100 results_by_directory[directory] = self._filesystem.sha1(path) 101 return results_by_directory 102 103 def _results_by_port_name(self, results_by_directory, baseline_name): 104 results_by_port_name = {} 105 for port_name in self._port_names: 106 for directory in self._relative_baseline_search_paths(port_name, baseline_name): 107 if directory in results_by_directory: 108 results_by_port_name[port_name] = results_by_directory[directory] 109 break 110 return results_by_port_name 111 112 @memoized 113 def _directories_immediately_preceding_root(self, baseline_name): 114 directories = set() 115 for port_name in self._port_names: 116 port = self._port_factory.get(port_name) 117 directory = self._filesystem.relpath(self._baseline_search_path(port, baseline_name)[-1], port.webkit_base()) 118 directories.add(directory) 119 return directories 120 121 def _optimize_result_for_root(self, new_results_by_directory, baseline_name): 122 # The root directory (i.e. LayoutTests) is the only one that doesn't correspond 123 # to a specific platform. As such, it's the only one where the baseline in fallback directories 124 # immediately before it can be promoted up, i.e. if win and mac 125 # have the same baseline, then it can be promoted up to be the LayoutTests baseline. 126 # All other baselines can only be removed if they're redundant with a baseline earlier 127 # in the fallback order. They can never promoted up. 128 directories_immediately_preceding_root = self._directories_immediately_preceding_root(baseline_name) 129 130 shared_result = None 131 root_baseline_unused = False 132 for directory in directories_immediately_preceding_root: 133 this_result = new_results_by_directory.get(directory) 134 135 # If any of these directories don't have a baseline, there's no optimization we can do. 136 if not this_result: 137 return 138 139 if not shared_result: 140 shared_result = this_result 141 elif shared_result != this_result: 142 root_baseline_unused = True 143 144 baseline_root = self._baseline_root(self._port_factory.get(), baseline_name) 145 146 # The root baseline is unused if all the directories immediately preceding the root 147 # have a baseline, but have different baselines, so the baselines can't be promoted up. 148 if root_baseline_unused: 149 if baseline_root in new_results_by_directory: 150 del new_results_by_directory[baseline_root] 151 return 152 153 new_results_by_directory[baseline_root] = shared_result 154 for directory in directories_immediately_preceding_root: 155 del new_results_by_directory[directory] 156 157 def _find_optimal_result_placement(self, baseline_name): 158 results_by_directory = self.read_results_by_directory(baseline_name) 159 results_by_port_name = self._results_by_port_name(results_by_directory, baseline_name) 160 port_names_by_result = _invert_dictionary(results_by_port_name) 161 162 new_results_by_directory = self._remove_redundant_results(results_by_directory, results_by_port_name, port_names_by_result, baseline_name) 163 self._optimize_result_for_root(new_results_by_directory, baseline_name) 164 165 return results_by_directory, new_results_by_directory 166 167 def _remove_redundant_results(self, results_by_directory, results_by_port_name, port_names_by_result, baseline_name): 168 new_results_by_directory = copy.copy(results_by_directory) 169 for port_name in self._port_names: 170 current_result = results_by_port_name.get(port_name) 171 172 # This happens if we're missing baselines for a port. 173 if not current_result: 174 continue; 175 176 fallback_path = self._relative_baseline_search_paths(port_name, baseline_name) 177 current_index, current_directory = self._find_in_fallbackpath(fallback_path, current_result, new_results_by_directory) 178 for index in range(current_index + 1, len(fallback_path)): 179 new_directory = fallback_path[index] 180 if not new_directory in new_results_by_directory: 181 # No result for this baseline in this directory. 182 continue 183 elif new_results_by_directory[new_directory] == current_result: 184 # Result for new_directory are redundant with the result earlier in the fallback order. 185 if current_directory in new_results_by_directory: 186 del new_results_by_directory[current_directory] 187 else: 188 # The new_directory contains a different result, so stop trying to push results up. 189 break 190 191 return new_results_by_directory 192 193 def _find_in_fallbackpath(self, fallback_path, current_result, results_by_directory): 194 for index, directory in enumerate(fallback_path): 195 if directory in results_by_directory and (results_by_directory[directory] == current_result): 196 return index, directory 197 assert False, "result %s not found in fallback_path %s, %s" % (current_result, fallback_path, results_by_directory) 198 199 def _platform(self, filename): 200 platform_dir = self.ROOT_LAYOUT_TESTS_DIRECTORY + self._filesystem.sep + 'platform' + self._filesystem.sep 201 if filename.startswith(platform_dir): 202 return filename.replace(platform_dir, '').split(self._filesystem.sep)[0] 203 platform_dir = self._filesystem.join(self._scm.checkout_root, platform_dir) 204 if filename.startswith(platform_dir): 205 return filename.replace(platform_dir, '').split(self._filesystem.sep)[0] 206 return '(generic)' 207 208 def _move_baselines(self, baseline_name, results_by_directory, new_results_by_directory): 209 data_for_result = {} 210 for directory, result in results_by_directory.items(): 211 if not result in data_for_result: 212 source = self._join_directory(directory, baseline_name) 213 data_for_result[result] = self._filesystem.read_binary_file(source) 214 215 scm_files = [] 216 fs_files = [] 217 for directory, result in results_by_directory.items(): 218 if new_results_by_directory.get(directory) != result: 219 file_name = self._join_directory(directory, baseline_name) 220 if self._scm.exists(file_name): 221 scm_files.append(file_name) 222 else: 223 fs_files.append(file_name) 224 225 if scm_files or fs_files: 226 if scm_files: 227 _log.debug(" Deleting (SCM):") 228 for platform_dir in sorted(self._platform(filename) for filename in scm_files): 229 _log.debug(" " + platform_dir) 230 if self._skip_scm_commands: 231 self._files_to_delete.extend(scm_files) 232 else: 233 self._scm.delete_list(scm_files) 234 if fs_files: 235 _log.debug(" Deleting (file system):") 236 for platform_dir in sorted(self._platform(filename) for filename in fs_files): 237 _log.debug(" " + platform_dir) 238 for filename in fs_files: 239 self._filesystem.remove(filename) 240 else: 241 _log.debug(" (Nothing to delete)") 242 243 file_names = [] 244 for directory, result in new_results_by_directory.items(): 245 if results_by_directory.get(directory) != result: 246 destination = self._join_directory(directory, baseline_name) 247 self._filesystem.maybe_make_directory(self._filesystem.split(destination)[0]) 248 self._filesystem.write_binary_file(destination, data_for_result[result]) 249 file_names.append(destination) 250 251 if file_names: 252 _log.debug(" Adding:") 253 for platform_dir in sorted(self._platform(filename) for filename in file_names): 254 _log.debug(" " + platform_dir) 255 if self._skip_scm_commands: 256 # Have adds win over deletes. 257 self._files_to_delete = list(set(self._files_to_delete) - set(file_names)) 258 self._files_to_add.extend(file_names) 259 else: 260 self._scm.add_list(file_names) 261 else: 262 _log.debug(" (Nothing to add)") 263 264 def write_by_directory(self, results_by_directory, writer, indent): 265 for path in sorted(results_by_directory): 266 writer("%s%s: %s" % (indent, self._platform(path), results_by_directory[path][0:6])) 267 268 def _optimize_subtree(self, baseline_name): 269 basename = self._filesystem.basename(baseline_name) 270 results_by_directory, new_results_by_directory = self._find_optimal_result_placement(baseline_name) 271 272 if new_results_by_directory == results_by_directory: 273 if new_results_by_directory: 274 _log.debug(" %s: (already optimal)" % basename) 275 self.write_by_directory(results_by_directory, _log.debug, " ") 276 else: 277 _log.debug(" %s: (no baselines found)" % basename) 278 # This is just used for unittests. Intentionally set it to the old data if we don't modify anything. 279 self.new_results_by_directory.append(results_by_directory) 280 return True 281 282 if self._results_by_port_name(results_by_directory, baseline_name) != self._results_by_port_name(new_results_by_directory, baseline_name): 283 # This really should never happen. Just a sanity check to make sure the script fails in the case of bugs 284 # instead of committing incorrect baselines. 285 _log.error(" %s: optimization failed" % basename) 286 self.write_by_directory(results_by_directory, _log.warning, " ") 287 return False 288 289 _log.debug(" %s:" % basename) 290 _log.debug(" Before: ") 291 self.write_by_directory(results_by_directory, _log.debug, " ") 292 _log.debug(" After: ") 293 self.write_by_directory(new_results_by_directory, _log.debug, " ") 294 295 self._move_baselines(baseline_name, results_by_directory, new_results_by_directory) 296 return True 297 298 def _optimize_virtual_root(self, baseline_name, non_virtual_baseline_name): 299 default_port = self._port_factory.get() 300 virtual_root_expected_baseline_path = self._filesystem.join(default_port.layout_tests_dir(), baseline_name) 301 if not self._filesystem.exists(virtual_root_expected_baseline_path): 302 return 303 root_sha1 = self._filesystem.sha1(virtual_root_expected_baseline_path) 304 305 results_by_directory = self.read_results_by_directory(non_virtual_baseline_name) 306 # See if all the immediate predecessors of the virtual root have the same expected result. 307 for port_name in self._port_names: 308 directories = self._relative_baseline_search_paths(port_name, non_virtual_baseline_name) 309 for directory in directories: 310 if directory not in results_by_directory: 311 continue 312 if results_by_directory[directory] != root_sha1: 313 return 314 break 315 316 _log.debug("Deleting redundant virtual root expected result.") 317 if self._skip_scm_commands: 318 self._files_to_delete.append(virtual_root_expected_baseline_path) 319 else: 320 self._scm.delete(virtual_root_expected_baseline_path) 321 322 def optimize(self, baseline_name): 323 # The virtual fallback path is the same as the non-virtual one tacked on to the bottom of the non-virtual path. 324 # See https://docs.google.com/a/chromium.org/drawings/d/1eGdsIKzJ2dxDDBbUaIABrN4aMLD1bqJTfyxNGZsTdmg/edit for 325 # a visual representation of this. 326 # 327 # So, we can optimize the virtual path, then the virtual root and then the regular path. 328 329 _log.debug("Optimizing regular fallback path.") 330 result = self._optimize_subtree(baseline_name) 331 non_virtual_baseline_name = self._port_factory.get().lookup_virtual_test_base(baseline_name) 332 if not non_virtual_baseline_name: 333 return result, self._files_to_delete, self._files_to_add 334 335 self._optimize_virtual_root(baseline_name, non_virtual_baseline_name) 336 337 _log.debug("Optimizing non-virtual fallback path.") 338 result |= self._optimize_subtree(non_virtual_baseline_name) 339 return result, self._files_to_delete, self._files_to_add 340