1# Copyright (c) 2009, Google Inc. All rights reserved. 2# 3# Redistribution and use in source and binary forms, with or without 4# modification, are permitted provided that the following conditions are 5# met: 6# 7# * Redistributions of source code must retain the above copyright 8# notice, this list of conditions and the following disclaimer. 9# * Redistributions in binary form must reproduce the above 10# copyright notice, this list of conditions and the following disclaimer 11# in the documentation and/or other materials provided with the 12# distribution. 13# * Neither the name of Google Inc. nor the names of its 14# contributors may be used to endorse or promote products derived from 15# this software without specific prior written permission. 16# 17# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28# 29# WebKit's Python module for interacting with WebKit's buildbot 30 31try: 32 import json 33except ImportError: 34 # python 2.5 compatibility 35 import webkitpy.thirdparty.simplejson as json 36 37import operator 38import re 39import urllib 40import urllib2 41 42from webkitpy.common.net.failuremap import FailureMap 43from webkitpy.common.net.layouttestresults import LayoutTestResults 44from webkitpy.common.net.regressionwindow import RegressionWindow 45from webkitpy.common.net.testoutputset import TestOutputSet 46from webkitpy.common.system.logutils import get_logger 47from webkitpy.common.system.zipfileset import ZipFileSet 48from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup 49from webkitpy.thirdparty.autoinstalled.mechanize import Browser 50 51_log = get_logger(__file__) 52 53 54class Builder(object): 55 def __init__(self, name, buildbot): 56 self._name = name 57 self._buildbot = buildbot 58 self._builds_cache = {} 59 self._revision_to_build_number = None 60 self._browser = Browser() 61 self._browser.set_handle_robots(False) # The builder pages are excluded by robots.txt 62 63 def name(self): 64 return self._name 65 66 def results_url(self): 67 return "http://%s/results/%s" % (self._buildbot.buildbot_host, self.url_encoded_name()) 68 69 def url_encoded_name(self): 70 return urllib.quote(self._name) 71 72 def url(self): 73 return "http://%s/builders/%s" % (self._buildbot.buildbot_host, self.url_encoded_name()) 74 75 # This provides a single place to mock 76 def _fetch_build(self, build_number): 77 build_dictionary = self._buildbot._fetch_build_dictionary(self, build_number) 78 if not build_dictionary: 79 return None 80 return Build(self, 81 build_number=int(build_dictionary['number']), 82 revision=int(build_dictionary['sourceStamp']['revision']), 83 is_green=(build_dictionary['results'] == 0) # Undocumented, 0 seems to mean "pass" 84 ) 85 86 def build(self, build_number): 87 if not build_number: 88 return None 89 cached_build = self._builds_cache.get(build_number) 90 if cached_build: 91 return cached_build 92 93 build = self._fetch_build(build_number) 94 self._builds_cache[build_number] = build 95 return build 96 97 def latest_cached_build(self): 98 revision_build_pairs = self.revision_build_pairs_with_results() 99 revision_build_pairs.sort(key=lambda i: i[1]) 100 latest_build_number = revision_build_pairs[-1][1] 101 return self.build(latest_build_number) 102 103 def force_build(self, username="webkit-patch", comments=None): 104 def predicate(form): 105 try: 106 return form.find_control("username") 107 except Exception, e: 108 return False 109 self._browser.open(self.url()) 110 self._browser.select_form(predicate=predicate) 111 self._browser["username"] = username 112 if comments: 113 self._browser["comments"] = comments 114 return self._browser.submit() 115 116 file_name_regexp = re.compile(r"r(?P<revision>\d+) \((?P<build_number>\d+)\)") 117 def _revision_and_build_for_filename(self, filename): 118 # Example: "r47483 (1)/" or "r47483 (1).zip" 119 match = self.file_name_regexp.match(filename) 120 return (int(match.group("revision")), int(match.group("build_number"))) 121 122 def _fetch_revision_to_build_map(self): 123 # All _fetch requests go through _buildbot for easier mocking 124 # FIXME: This should use NetworkTransaction's 404 handling instead. 125 try: 126 # FIXME: This method is horribly slow due to the huge network load. 127 # FIXME: This is a poor way to do revision -> build mapping. 128 # Better would be to ask buildbot through some sort of API. 129 print "Loading revision/build list from %s." % self.results_url() 130 print "This may take a while..." 131 result_files = self._buildbot._fetch_twisted_directory_listing(self.results_url()) 132 except urllib2.HTTPError, error: 133 if error.code != 404: 134 raise 135 result_files = [] 136 137 # This assumes there was only one build per revision, which is false but we don't care for now. 138 return dict([self._revision_and_build_for_filename(file_info["filename"]) for file_info in result_files]) 139 140 def _revision_to_build_map(self): 141 if not self._revision_to_build_number: 142 self._revision_to_build_number = self._fetch_revision_to_build_map() 143 return self._revision_to_build_number 144 145 def revision_build_pairs_with_results(self): 146 return self._revision_to_build_map().items() 147 148 # This assumes there can be only one build per revision, which is false, but we don't care for now. 149 def build_for_revision(self, revision, allow_failed_lookups=False): 150 # NOTE: This lookup will fail if that exact revision was never built. 151 build_number = self._revision_to_build_map().get(int(revision)) 152 if not build_number: 153 return None 154 build = self.build(build_number) 155 if not build and allow_failed_lookups: 156 # Builds for old revisions with fail to lookup via buildbot's json api. 157 build = Build(self, 158 build_number=build_number, 159 revision=revision, 160 is_green=False, 161 ) 162 return build 163 164 def find_regression_window(self, red_build, look_back_limit=30): 165 if not red_build or red_build.is_green(): 166 return RegressionWindow(None, None) 167 common_failures = None 168 current_build = red_build 169 build_after_current_build = None 170 look_back_count = 0 171 while current_build: 172 if current_build.is_green(): 173 # current_build can't possibly have any failures in common 174 # with red_build because it's green. 175 break 176 results = current_build.layout_test_results() 177 # We treat a lack of results as if all the test failed. 178 # This occurs, for example, when we can't compile at all. 179 if results: 180 failures = set(results.failing_tests()) 181 if common_failures == None: 182 common_failures = failures 183 else: 184 common_failures = common_failures.intersection(failures) 185 if not common_failures: 186 # current_build doesn't have any failures in common with 187 # the red build we're worried about. We assume that any 188 # failures in current_build were due to flakiness. 189 break 190 look_back_count += 1 191 if look_back_count > look_back_limit: 192 return RegressionWindow(None, current_build, failing_tests=common_failures) 193 build_after_current_build = current_build 194 current_build = current_build.previous_build() 195 # We must iterate at least once because red_build is red. 196 assert(build_after_current_build) 197 # Current build must either be green or have no failures in common 198 # with red build, so we've found our failure transition. 199 return RegressionWindow(current_build, build_after_current_build, failing_tests=common_failures) 200 201 def find_blameworthy_regression_window(self, red_build_number, look_back_limit=30, avoid_flakey_tests=True): 202 red_build = self.build(red_build_number) 203 regression_window = self.find_regression_window(red_build, look_back_limit) 204 if not regression_window.build_before_failure(): 205 return None # We ran off the limit of our search 206 # If avoid_flakey_tests, require at least 2 bad builds before we 207 # suspect a real failure transition. 208 if avoid_flakey_tests and regression_window.failing_build() == red_build: 209 return None 210 return regression_window 211 212 213class Build(object): 214 def __init__(self, builder, build_number, revision, is_green): 215 self._builder = builder 216 self._number = build_number 217 self._revision = revision 218 self._is_green = is_green 219 self._layout_test_results = None 220 221 @staticmethod 222 def build_url(builder, build_number): 223 return "%s/builds/%s" % (builder.url(), build_number) 224 225 def url(self): 226 return self.build_url(self.builder(), self._number) 227 228 def results_url(self): 229 results_directory = "r%s (%s)" % (self.revision(), self._number) 230 return "%s/%s" % (self._builder.results_url(), urllib.quote(results_directory)) 231 232 def results_zip_url(self): 233 return "%s.zip" % self.results_url() 234 235 def results(self): 236 return TestOutputSet(self._builder.name(), None, ZipFileSet(self.results_zip_url()), include_expected=False) 237 238 def _fetch_results_html(self): 239 results_html = "%s/results.html" % (self.results_url()) 240 # FIXME: This should use NetworkTransaction's 404 handling instead. 241 try: 242 # It seems this can return None if the url redirects and then returns 404. 243 return urllib2.urlopen(results_html) 244 except urllib2.HTTPError, error: 245 if error.code != 404: 246 raise 247 248 def layout_test_results(self): 249 if not self._layout_test_results: 250 # FIXME: This should cache that the result was a 404 and stop hitting the network. 251 self._layout_test_results = LayoutTestResults.results_from_string(self._fetch_results_html()) 252 return self._layout_test_results 253 254 def builder(self): 255 return self._builder 256 257 def revision(self): 258 return self._revision 259 260 def is_green(self): 261 return self._is_green 262 263 def previous_build(self): 264 # previous_build() allows callers to avoid assuming build numbers are sequential. 265 # They may not be sequential across all master changes, or when non-trunk builds are made. 266 return self._builder.build(self._number - 1) 267 268 269class BuildBot(object): 270 # FIXME: This should move into common.config.urls. 271 default_host = "build.webkit.org" 272 273 def __init__(self, host=default_host): 274 self.buildbot_host = host 275 self._builder_by_name = {} 276 277 # If any core builder is red we should not be landing patches. Other 278 # builders should be added to this list once they are known to be 279 # reliable. 280 # See https://bugs.webkit.org/show_bug.cgi?id=33296 and related bugs. 281 self.core_builder_names_regexps = [ 282 "SnowLeopard.*Build", 283 "SnowLeopard.*\(Test", 284 "SnowLeopard.*\(WebKit2 Test", 285 "Leopard.*", 286 "Windows.*Build", 287 "Windows.*\(Test", 288 "WinCairo", 289 "WinCE", 290 "EFL", 291 "GTK.*32", 292 "GTK.*64.*Debug", # Disallow the 64-bit Release bot which is broken. 293 "Qt", 294 "Chromium.*Release$", 295 ] 296 297 def _parse_last_build_cell(self, builder, cell): 298 status_link = cell.find('a') 299 if status_link: 300 # Will be either a revision number or a build number 301 revision_string = status_link.string 302 # If revision_string has non-digits assume it's not a revision number. 303 builder['built_revision'] = int(revision_string) \ 304 if not re.match('\D', revision_string) \ 305 else None 306 307 # FIXME: We treat slave lost as green even though it is not to 308 # work around the Qts bot being on a broken internet connection. 309 # The real fix is https://bugs.webkit.org/show_bug.cgi?id=37099 310 builder['is_green'] = not re.search('fail', cell.renderContents()) or \ 311 not not re.search('lost', cell.renderContents()) 312 313 status_link_regexp = r"builders/(?P<builder_name>.*)/builds/(?P<build_number>\d+)" 314 link_match = re.match(status_link_regexp, status_link['href']) 315 builder['build_number'] = int(link_match.group("build_number")) 316 else: 317 # We failed to find a link in the first cell, just give up. This 318 # can happen if a builder is just-added, the first cell will just 319 # be "no build" 320 # Other parts of the code depend on is_green being present. 321 builder['is_green'] = False 322 builder['built_revision'] = None 323 builder['build_number'] = None 324 325 def _parse_current_build_cell(self, builder, cell): 326 activity_lines = cell.renderContents().split("<br />") 327 builder["activity"] = activity_lines[0] # normally "building" or "idle" 328 # The middle lines document how long left for any current builds. 329 match = re.match("(?P<pending_builds>\d) pending", activity_lines[-1]) 330 builder["pending_builds"] = int(match.group("pending_builds")) if match else 0 331 332 def _parse_builder_status_from_row(self, status_row): 333 status_cells = status_row.findAll('td') 334 builder = {} 335 336 # First cell is the name 337 name_link = status_cells[0].find('a') 338 builder["name"] = unicode(name_link.string) 339 340 self._parse_last_build_cell(builder, status_cells[1]) 341 self._parse_current_build_cell(builder, status_cells[2]) 342 return builder 343 344 def _matches_regexps(self, builder_name, name_regexps): 345 for name_regexp in name_regexps: 346 if re.match(name_regexp, builder_name): 347 return True 348 return False 349 350 # FIXME: Should move onto Builder 351 def _is_core_builder(self, builder_name): 352 return self._matches_regexps(builder_name, self.core_builder_names_regexps) 353 354 # FIXME: This method needs to die, but is used by a unit test at the moment. 355 def _builder_statuses_with_names_matching_regexps(self, builder_statuses, name_regexps): 356 return [builder for builder in builder_statuses if self._matches_regexps(builder["name"], name_regexps)] 357 358 def red_core_builders(self): 359 return [builder for builder in self.core_builder_statuses() if not builder["is_green"]] 360 361 def red_core_builders_names(self): 362 return [builder["name"] for builder in self.red_core_builders()] 363 364 def idle_red_core_builders(self): 365 return [builder for builder in self.red_core_builders() if builder["activity"] == "idle"] 366 367 def core_builders_are_green(self): 368 return not self.red_core_builders() 369 370 # FIXME: These _fetch methods should move to a networking class. 371 def _fetch_build_dictionary(self, builder, build_number): 372 try: 373 base = "http://%s" % self.buildbot_host 374 path = urllib.quote("json/builders/%s/builds/%s" % (builder.name(), 375 build_number)) 376 url = "%s/%s" % (base, path) 377 jsondata = urllib2.urlopen(url) 378 return json.load(jsondata) 379 except urllib2.URLError, err: 380 build_url = Build.build_url(builder, build_number) 381 _log.error("Error fetching data for %s build %s (%s): %s" % (builder.name(), build_number, build_url, err)) 382 return None 383 except ValueError, err: 384 build_url = Build.build_url(builder, build_number) 385 _log.error("Error decoding json data from %s: %s" % (build_url, err)) 386 return None 387 388 def _fetch_one_box_per_builder(self): 389 build_status_url = "http://%s/one_box_per_builder" % self.buildbot_host 390 return urllib2.urlopen(build_status_url) 391 392 def _file_cell_text(self, file_cell): 393 """Traverses down through firstChild elements until one containing a string is found, then returns that string""" 394 element = file_cell 395 while element.string is None and element.contents: 396 element = element.contents[0] 397 return element.string 398 399 def _parse_twisted_file_row(self, file_row): 400 string_or_empty = lambda string: unicode(string) if string else u"" 401 file_cells = file_row.findAll('td') 402 return { 403 "filename": string_or_empty(self._file_cell_text(file_cells[0])), 404 "size": string_or_empty(self._file_cell_text(file_cells[1])), 405 "type": string_or_empty(self._file_cell_text(file_cells[2])), 406 "encoding": string_or_empty(self._file_cell_text(file_cells[3])), 407 } 408 409 def _parse_twisted_directory_listing(self, page): 410 soup = BeautifulSoup(page) 411 # HACK: Match only table rows with a class to ignore twisted header/footer rows. 412 file_rows = soup.find('table').findAll('tr', {'class': re.compile(r'\b(?:directory|file)\b')}) 413 return [self._parse_twisted_file_row(file_row) for file_row in file_rows] 414 415 # FIXME: There should be a better way to get this information directly from twisted. 416 def _fetch_twisted_directory_listing(self, url): 417 return self._parse_twisted_directory_listing(urllib2.urlopen(url)) 418 419 def builders(self): 420 return [self.builder_with_name(status["name"]) for status in self.builder_statuses()] 421 422 # This method pulls from /one_box_per_builder as an efficient way to get information about 423 def builder_statuses(self): 424 soup = BeautifulSoup(self._fetch_one_box_per_builder()) 425 return [self._parse_builder_status_from_row(status_row) for status_row in soup.find('table').findAll('tr')] 426 427 def core_builder_statuses(self): 428 return [builder for builder in self.builder_statuses() if self._is_core_builder(builder["name"])] 429 430 def builder_with_name(self, name): 431 builder = self._builder_by_name.get(name) 432 if not builder: 433 builder = Builder(name, self) 434 self._builder_by_name[name] = builder 435 return builder 436 437 def failure_map(self, only_core_builders=True): 438 builder_statuses = self.core_builder_statuses() if only_core_builders else self.builder_statuses() 439 failure_map = FailureMap() 440 revision_to_failing_bots = {} 441 for builder_status in builder_statuses: 442 if builder_status["is_green"]: 443 continue 444 builder = self.builder_with_name(builder_status["name"]) 445 regression_window = builder.find_blameworthy_regression_window(builder_status["build_number"]) 446 if regression_window: 447 failure_map.add_regression_window(builder, regression_window) 448 return failure_map 449 450 # This makes fewer requests than calling Builder.latest_build would. It grabs all builder 451 # statuses in one request using self.builder_statuses (fetching /one_box_per_builder instead of builder pages). 452 def _latest_builds_from_builders(self, only_core_builders=True): 453 builder_statuses = self.core_builder_statuses() if only_core_builders else self.builder_statuses() 454 return [self.builder_with_name(status["name"]).build(status["build_number"]) for status in builder_statuses] 455 456 def _build_at_or_before_revision(self, build, revision): 457 while build: 458 if build.revision() <= revision: 459 return build 460 build = build.previous_build() 461 462 def last_green_revision(self, only_core_builders=True): 463 builds = self._latest_builds_from_builders(only_core_builders) 464 target_revision = builds[0].revision() 465 # An alternate way to do this would be to start at one revision and walk backwards 466 # checking builder.build_for_revision, however build_for_revision is very slow on first load. 467 while True: 468 # Make builds agree on revision 469 builds = [self._build_at_or_before_revision(build, target_revision) for build in builds] 470 if None in builds: # One of the builds failed to load from the server. 471 return None 472 min_revision = min(map(lambda build: build.revision(), builds)) 473 if min_revision != target_revision: 474 target_revision = min_revision 475 continue # Builds don't all agree on revision, keep searching 476 # Check to make sure they're all green 477 all_are_green = reduce(operator.and_, map(lambda build: build.is_green(), builds)) 478 if not all_are_green: 479 target_revision -= 1 480 continue 481 return min_revision 482