1#!/usr/bin/env python3 2# 3# Copyright (C) 2016 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16import argparse, collections, os, re, sys 17 18dir_of_this_script = os.path.dirname(os.path.realpath(__file__)) 19 20parser = argparse.ArgumentParser( 21 description="""USAGE: 22 Simplifies a build.log from hundreds of megabytes to <100 lines. Prints output to terminal. 23 Pass this script a filepath to parse. You should be able to type "python3 build_log_simplifier.py" 24 And then drag-and-drop a log file onto the terminal window to get its path. 25 26 Sample usage: python3 development/build_log_simplifier.py Users/owengray/Desktop/build.log 27 """) 28parser.add_argument("--validate", action="store_true", help="Validate that no unrecognized messages exist in the given log") 29parser.add_argument("--update", action="store_true", help="Update our list of recognized messages to include all messages from the given log") 30parser.add_argument("--gc", action="store_true", help="When generating a new exemptions file, exclude any exemptions that were not found in the given log. Only relevant with --update or --validate") 31parser.add_argument("log_path", help="Filepath of log(s) to process", nargs="+") 32 33# a regexes_matcher can quickly identify which of a set of regexes matches a given text 34class regexes_matcher(object): 35 def __init__(self, regexes): 36 self.regex_texts = regexes 37 self.children = None 38 self.matcher = None 39 40 # returns a list of regexes that match the given text 41 def get_matching_regexes(self, text, expect_match=True): 42 if expect_match and len(self.regex_texts) > 1: 43 # If we already expect our matcher to match, we can directly jump to asking our children 44 return self.query_children_for_matching_regexes(text) 45 # It takes more time to match lots of regexes than to match one composite regex 46 # So, we try to match one composite regex first 47 if self.matches(text): 48 if len(self.regex_texts) > 1: 49 # At least one child regex matches, so we have to determine which ones 50 return self.query_children_for_matching_regexes(text) 51 else: 52 return self.regex_texts 53 # Our composite regex yielded no matches 54 return [] 55 56 # queries our children for regexes that match <text> 57 def query_children_for_matching_regexes(self, text): 58 # Create children if they don't yet exist 59 self.ensure_split() 60 # query children and join their results 61 results = [] 62 for child in self.children: 63 results += child.get_matching_regexes(text, False) 64 return results 65 66 # Returns the index of the first regex matching this string, or None of not found 67 def index_first_matching_regex(self, text): 68 if len(self.regex_texts) <= 1: 69 if len(self.regex_texts) == 0: 70 return None 71 if self.matches(text): 72 return 0 73 return None 74 if not self.matches(text): 75 return None 76 self.ensure_split() 77 count = 0 78 for child in self.children: 79 child_index = child.index_first_matching_regex(text) 80 if child_index is not None: 81 return count + child_index 82 count += len(child.regex_texts) 83 return None 84 85 # Create children if they don't yet exist 86 def ensure_split(self): 87 if self.children is None: 88 # It takes more time to compile a longer regex, but it also takes more time to 89 # test lots of small regexes. 90 # In practice, this number of children seems to result in fast execution 91 num_children = min(len(self.regex_texts), 32) 92 child_start = 0 93 self.children = [] 94 for i in range(num_children): 95 child_end = int(len(self.regex_texts) * (i + 1) / num_children) 96 self.children.append(regexes_matcher(self.regex_texts[child_start:child_end])) 97 child_start = child_end 98 99 100 def matches(self, text): 101 if self.matcher is None: 102 full_regex_text = "(?:" + ")|(?:".join(self.regex_texts) + ")" 103 self.matcher = re.compile(full_regex_text) 104 return self.matcher.fullmatch(text) 105 106 107def print_failing_task_names(lines): 108 tasks_of_interest = [] 109 # first, find tasks of interest 110 for line in lines: 111 if line.startswith("Execution failed for task"): 112 tasks_of_interest.append(line.split("task '")[1][:-3]) 113 114 print("Detected these failing tasks: " + str(tasks_of_interest)) 115 116def shorten_uninteresting_stack_frames(lines): 117 result = [] 118 prev_line_is_boring = False 119 for line in lines: 120 if line.startswith("\tat ") and not line.startswith("\tat androidx"): 121 # non-androidx stack frame 122 if not prev_line_is_boring: 123 result.append(line.replace("\n", "...\n")) 124 prev_line_is_boring = True 125 else: 126 result.append(line) 127 prev_line_is_boring = False 128 return result 129 130# Returns the path of the config file holding exemptions for deterministic/consistent output. 131# These exemptions can be garbage collected via the `--gc` argument 132def get_deterministic_exemptions_path(): 133 return os.path.join(dir_of_this_script, "messages.ignore") 134 135# Returns the path of the config file holding exemptions for nondetermistic/flaky output. 136# These exemptions will not be garbage collected via the `--gc` argument 137def get_flake_exemptions_path(): 138 return os.path.join(dir_of_this_script, "message-flakes.ignore") 139 140# Returns a regexes_matcher that matches what is described by our config file 141# Ignores comments and ordering in our config file 142def build_exemptions_matcher(config_lines): 143 config_lines = [line.replace("\n", "") for line in config_lines] 144 regexes = [] 145 for line in config_lines: 146 line = line.strip() 147 if line.startswith("#") or line == "": 148 # skip comments 149 continue 150 regexes.append(line) 151 if remove_control_characters(line) != line: 152 raise Exception("Unexpected control characters found in configuration line:\n\n " + 153 "'" + line + "'\n\n. This line is unexpected to match anything. Is this a copying mistake?") 154 155 return regexes_matcher(sorted(regexes)) 156 157# Returns a regexes_matcher that matches the content of our config file 158# Can match comments 159# Respects ordering in the config 160# This is used for editing the config file itself 161def build_exemptions_code_matcher(config_lines): 162 config_lines = [line.strip() for line in config_lines] 163 regexes = [] 164 for line in config_lines: 165 line = line.strip() 166 if line == "": 167 continue 168 regexes.append(line) 169 return regexes_matcher(regexes) 170 171def remove_by_regexes(lines, config_lines, validate_no_duplicates): 172 fast_matcher = build_exemptions_matcher(config_lines) 173 result = [] 174 for line in lines: 175 stripped = line.strip() 176 matching_exemptions = fast_matcher.get_matching_regexes(stripped, expect_match=True) 177 if validate_no_duplicates and len(matching_exemptions) > 1: 178 print("") 179 print("build_log_simplifier.py: Invalid configuration: multiple message exemptions match the same message. Are some exemptions too broad?") 180 print("") 181 print("Line: '" + stripped + "'") 182 print("") 183 print(str(len(matching_exemptions)) + " Matching exemptions:") 184 for exemption_text in matching_exemptions: 185 print("'" + exemption_text + "'") 186 exit(1) 187 if len(matching_exemptions) < 1: 188 result.append(line) 189 return result 190 191def collapse_consecutive_blank_lines(lines): 192 result = [] 193 prev_blank = True 194 for line in lines: 195 if line.strip() == "": 196 if not prev_blank: 197 result.append(line) 198 prev_blank = True 199 else: 200 result.append(line) 201 prev_blank = False 202 return result 203 204def remove_trailing_blank_lines(lines): 205 while len(lines) > 0 and lines[-1].strip() == "": 206 del lines[-1] 207 return lines 208 209def extract_task_name(line): 210 prefix = "> Task " 211 if line.startswith(prefix): 212 return line[len(prefix):].strip() 213 return None 214 215def is_task_line(line): 216 return extract_task_name(line) is not None 217 218def extract_task_names(lines): 219 names = [] 220 for line in lines: 221 name = extract_task_name(line) 222 if name is not None and name not in names: 223 names.append(name) 224 return names 225 226# If a task has no output (or only blank output), this function removes the task (and its output) 227# For example, turns this: 228# > Task :a 229# > Task :b 230# some message 231# 232# into this: 233# 234# > Task :b 235# some message 236def collapse_tasks_having_no_output(lines): 237 result = [] 238 # When we see a task name, we might not emit it if it doesn't have any output 239 # This variable is that pending task name, or none if we have no pending task 240 pending_task = None 241 pending_blanks = [] 242 for line in lines: 243 is_section = is_task_line(line) or line.startswith("> Configure project ") or line.startswith("FAILURE: Build failed with an exception.") 244 if is_section: 245 pending_task = line 246 pending_blanks = [] 247 elif line.strip() == "": 248 # If we have a pending task and we found a blank line, then hold the blank line, 249 # and only output it if we later find some nonempty output 250 if pending_task is not None: 251 pending_blanks.append(line) 252 else: 253 result.append(line) 254 else: 255 # We found some nonempty output, now we emit any pending task names 256 if pending_task is not None: 257 result.append(pending_task) 258 result += pending_blanks 259 pending_task = None 260 pending_blanks = [] 261 result.append(line) 262 return result 263 264# Removes color characters and other ANSI control characters from this input 265control_character_regex = re.compile(r""" 266 \x1B # Escape 267 (?: # 7-bit C1 Fe (except CSI) 268 [@-Z\\-_] 269 | # or [ for CSI, followed by a control sequence 270 \[ 271 [0-?]* # Parameters 272 [ -/]* # Intermediate bytes 273 [@-~] # End 274 ) 275 """, re.VERBOSE) 276 277def remove_control_characters(line): 278 return control_character_regex.sub("", line) 279 280# Removes strings from the input wherever they are found 281# This list is less convenient than the .ignore files: 282# This list doesn't get autosuggested additions 283# This list isn't automatically garbage collected 284# Users interested in seeing the exemption history probably won't think to look here 285# This list does allow removing part of the text from a line and still validating the remainder of the line 286# If this list eventually gets long we might want to make it easier to update 287inline_ignores_regex = re.compile( 288 # b/300072778 289 "Sharing is only supported for boot loader classes because bootstrap classpath has been appended" 290) 291 292def remove_inline_ignores(line): 293 return re.sub(inline_ignores_regex, "", line) 294 295# Normalizes some filepaths to more easily simplify/skip some messages 296def normalize_paths(lines): 297 # get OUT_DIR, DIST_DIR, and the path of the root of the checkout 298 out_dir = None 299 dist_dir = None 300 checkout_dir = None 301 gradle_user_home = None 302 # we read checkout_root from the log file in case this build was run in a location, 303 # such as on a build server 304 out_marker = "OUT_DIR=" 305 dist_marker = "DIST_DIR=" 306 checkout_marker = "CHECKOUT=" 307 gradle_user_home_marker="GRADLE_USER_HOME=" 308 for line in lines: 309 if line.startswith(out_marker): 310 out_dir = line.split(out_marker)[1].strip() 311 continue 312 if line.startswith(dist_marker): 313 dist_dir = line.split(dist_marker)[1].strip() 314 continue 315 if line.startswith(checkout_marker): 316 checkout_dir = line.split(checkout_marker)[1].strip() 317 continue 318 if line.startswith(gradle_user_home_marker): 319 gradle_user_home = line.split(gradle_user_home_marker)[1].strip() 320 continue 321 if out_dir is not None and dist_dir is not None and checkout_dir is not None and gradle_user_home is not None: 322 break 323 324 # Remove any mentions of these paths, and replace them with consistent values 325 # Make sure to put these paths in the correct order so that more-specific paths will 326 # be matched first 327 remove_paths = collections.OrderedDict() 328 if gradle_user_home is not None: 329 remove_paths[gradle_user_home] = "$GRADLE_USER_HOME" 330 if dist_dir is not None: 331 remove_paths[dist_dir] = "$DIST_DIR" 332 if out_dir is not None: 333 remove_paths[out_dir] = "$OUT_DIR" 334 if checkout_dir is not None: 335 remove_paths[checkout_dir + "/frameworks/support"] = "$SUPPORT" 336 remove_paths[checkout_dir] = "$CHECKOUT" 337 result = [] 338 for line in lines: 339 for path in remove_paths: 340 if path in line: 341 replacement = remove_paths[path] 342 line = line.replace(path + "/", replacement + "/") 343 line = line.replace(path, replacement) 344 result.append(line) 345 return result 346 347# Given a regex with hashes in it like ".gradle/caches/transforms-2/files-2.1/73f631f487bd87cfd8cb2aabafbac6a8", 348# tries to return a more generalized regex like ".gradle/caches/transforms-2/files-2.1/[0-9a-f]{32}" 349def generalize_hashes(message): 350 hash_matcher = "[0-9a-f]{32}" 351 return re.sub(hash_matcher, hash_matcher, message) 352 353# Given a regex with numbers in it like ".gradle/caches/transforms-2/files-2.1/73f631f487bd87cfd8cb2aabafbac6a8" 354# tries to return a more generalized regex like ".gradle/caches/transforms-[0-9]*/files-[0-9]*.[0-9]*/73f631f487bd87cfd8cb2aabafbac6a8" 355def generalize_numbers(message): 356 matcher = "[0-9]+" 357 generalized = re.sub(matcher, matcher, message) 358 # the above replacement corrupts strings of the form "[0-9a-f]{32}", so we fix them before returning 359 return generalized.replace("[[0-9]+-[0-9]+a-f]{[0-9]+}", "[0-9a-f]{32}") 360 361# Given a list of output messages and a list of existing exemption lines, 362# generates a new list of exemption lines 363def generate_suggested_exemptions(messages, config_lines, remove_unmatched_lines): 364 new_config = suggest_missing_exemptions(messages, config_lines) 365 if remove_unmatched_lines: 366 new_config = remove_unmatched_exemptions(messages, new_config) 367 return new_config 368 369# Given a list of output messages and a list of existing exemption lines, 370# generates an augmented list of exemptions containing any necessary new exemptions 371def suggest_missing_exemptions(messages, config_lines): 372 # given a message, finds the index of the existing exemption for that message, if any 373 existing_matcher = build_exemptions_code_matcher(config_lines) 374 # the index of the previously matched exemption 375 previous_found_index = -1 376 # map from line index to list of lines to insert there 377 insertions_by_position = collections.defaultdict(lambda: []) 378 insertions_by_task_name = collections.OrderedDict() 379 # current task generating any subsequent output 380 pending_task_line = None 381 # new, suggested exemptions 382 new_suggestions = set() 383 # generate new suggestions 384 for line in messages: 385 line = line.strip() 386 if line == "": 387 continue 388 # save task name 389 is_section = False 390 if is_task_line(line) or line.startswith("> Configure project "): 391 # If a task creates output, we record its name 392 line = "# " + line 393 pending_task_line = line 394 is_section = True 395 # determine where to put task name 396 current_found_index = existing_matcher.index_first_matching_regex(line) 397 if current_found_index is not None: 398 # We already have a mention of this line 399 # We don't need to exempt it again, but this informs where to insert our next exemption 400 previous_found_index = current_found_index 401 pending_task_line = None 402 continue 403 # skip outputting task names for tasks that don't output anything 404 if is_section: 405 continue 406 407 # escape message 408 escaped = re.escape(line) 409 escaped = escaped.replace("\ ", " ") # spaces don't need to be escaped 410 escaped = generalize_hashes(escaped) 411 escaped = generalize_numbers(escaped) 412 # confirm that we haven't already inserted this message 413 if escaped in new_suggestions: 414 continue 415 # insert this regex into an appropriate position 416 if pending_task_line is not None: 417 # We know which task this line came from, and it's a task that didn't previously make output 418 if pending_task_line not in insertions_by_task_name: 419 insertions_by_task_name[pending_task_line] = [] 420 insertions_by_task_name[pending_task_line].append(escaped) 421 else: 422 # This line of output didn't come from a new task 423 # So we append it after the previous line that we found 424 insertions_by_position[previous_found_index].append(escaped) 425 new_suggestions.add(escaped) 426 427 # for each regex for which we chose a position in the file, insert it there 428 exemption_lines = [] 429 for i in range(len(existing_matcher.regex_texts)): 430 exemption_lines.append(existing_matcher.regex_texts[i]) 431 if i in insertions_by_position: 432 exemption_lines += insertions_by_position[i] 433 # for regexes that could not be assigned to a task, insert them next 434 if -1 in insertions_by_position: 435 exemption_lines += insertions_by_position[-1] 436 # for regexes that were simply assigned to certain task names, insert the there, grouped by task 437 for task_name in insertions_by_task_name: 438 exemption_lines.append(task_name) 439 exemption_lines += insertions_by_task_name[task_name] 440 return exemption_lines 441 442# Searches for config lines in <config_lines> that match no line in <messages> 443# Create and returns a new list of config lines, which excludes unmatched lines and 444# any corresponding comments 445def remove_unmatched_exemptions(messages, config_lines): 446 existing_matcher = build_exemptions_matcher(config_lines) 447 matched_config_lines = set() 448 # find all of the regexes that match at least one message 449 for line in messages: 450 line = line.strip() 451 if line.startswith("#"): 452 continue 453 for regex in existing_matcher.get_matching_regexes(line): 454 matched_config_lines.add(regex) 455 # generate a new list of config lines 456 # keep config lines that were matched in the list of messages 457 # keep comments where there remains a matched config line before the next comment 458 # skip comments that were previously followed by other config lines that were deleted 459 result = [] 460 pending_comments = [] # comments that we haven't yet decided to keep or not 461 found_unused_line_after_comment = False 462 for line in config_lines: 463 if line.startswith("#"): 464 # We found a comment 465 if found_unused_line_after_comment: 466 # We found an unused config line more recently than the previous comment, 467 # and now we've found a new comment. 468 if len(pending_comments) > 0: 469 # We also haven't found any used config lines more recently than the previous comment 470 # Presumably these pending comments were intended to describe the lines that we're removing 471 # So, we skip emitting these pending comments too 472 pending_comments = [] 473 pending_comments.append(line) 474 found_unused_line_after_comment = False 475 continue 476 matched = (line in matched_config_lines) 477 if matched: 478 # If this config line is being used, then we keep its comments too 479 result += pending_comments 480 pending_comments = [] 481 result.append(line) 482 else: 483 found_unused_line_after_comment = True 484 # If there are any comments at the bottom of the file, then keep them too 485 if not found_unused_line_after_comment: 486 result += pending_comments 487 return result 488 489# opens a file and reads the lines in it 490def readlines(path): 491 infile = open(path) 492 lines = infile.readlines() 493 infile.close() 494 return lines 495 496def writelines(path, lines): 497 destfile = open(path, 'w') 498 destfile.write("\n".join(lines)) 499 destfile.close() 500 501def main(): 502 arguments = parser.parse_args() 503 504 # read each file 505 log_paths = arguments.log_path 506 all_lines = [] 507 for log_path in log_paths: 508 lines = readlines(log_path) 509 lines = [remove_control_characters(line) for line in lines] 510 lines = [remove_inline_ignores(line) for line in lines] 511 lines = normalize_paths(lines) 512 all_lines += lines 513 # load configuration 514 flake_exemption_regexes = readlines(get_flake_exemptions_path()) 515 deterministic_exemption_regexes = readlines(get_deterministic_exemptions_path()) 516 exemption_regexes = flake_exemption_regexes + deterministic_exemption_regexes 517 # load configuration 518 # remove lines we're not interested in 519 update = arguments.update or arguments.gc 520 validate = update or arguments.validate 521 interesting_lines = all_lines 522 if not validate: 523 print_failing_task_names(interesting_lines) 524 interesting_lines = remove_by_regexes(interesting_lines, exemption_regexes, validate) 525 interesting_lines = collapse_tasks_having_no_output(interesting_lines) 526 interesting_lines = collapse_consecutive_blank_lines(interesting_lines) 527 interesting_lines = remove_trailing_blank_lines(interesting_lines) 528 529 # process results 530 if update: 531 if arguments.gc or len(interesting_lines) != 0: 532 update_path = get_deterministic_exemptions_path() 533 # filter out any inconsistently observed messages so we don't try to exempt them twice 534 all_lines = remove_by_regexes(all_lines, flake_exemption_regexes, validate) 535 # update the deterministic exemptions file based on the result 536 suggested = generate_suggested_exemptions(all_lines, deterministic_exemption_regexes, arguments.gc) 537 writelines(update_path, suggested) 538 print("build_log_simplifier.py updated exemptions " + update_path) 539 elif validate: 540 if len(interesting_lines) != 0: 541 print("") 542 print("=" * 80) 543 print("build_log_simplifier.py: Error: Found " + str(len(interesting_lines)) + " new lines of warning output!") 544 print("") 545 print("The new output:") 546 print(" " + " ".join(interesting_lines)) 547 print("") 548 print("To reproduce this failure:") 549 print(" Try $ ./gradlew -Pandroidx.validateNoUnrecognizedMessages --rerun-tasks " + " ".join(extract_task_names(interesting_lines))) 550 print("") 551 print("Instructions:") 552 print(" If you can fix these messages, do so.") 553 print(" If you cannot fix these messages, you may suppress them.") 554 print(" To automatically suppress new output from build server builds, run development/build_log_simplifier/update.sh") 555 print(" See also https://android.googlesource.com/platform/frameworks/support/+/androidx-main/development/build_log_simplifier/VALIDATION_FAILURE.md") 556 print("") 557 new_exemptions_path = log_paths[0] + ".ignore" 558 # filter out any inconsistently observed messages so we don't try to exempt them twice 559 all_lines = remove_by_regexes(all_lines, flake_exemption_regexes, validate) 560 # update deterministic exemptions file based on the result 561 suggested = generate_suggested_exemptions(all_lines, deterministic_exemption_regexes, arguments.gc) 562 writelines(new_exemptions_path, suggested) 563 print("Files:") 564 print(" Full Log : " + ",".join(log_paths)) 565 print(" Baseline : " + get_deterministic_exemptions_path()) 566 print(" Autogenerated new baseline : " + new_exemptions_path) 567 exit(1) 568 else: 569 interesting_lines = shorten_uninteresting_stack_frames(interesting_lines) 570 print("".join(interesting_lines)) 571 572if __name__ == "__main__": 573 main() 574