#!/usr/bin/env python3 # # Copyright (C) 2016 The Android Open Source Project # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import argparse, collections, os, re, sys dir_of_this_script = os.path.dirname(os.path.realpath(__file__)) parser = argparse.ArgumentParser( description="""USAGE: Simplifies a build.log from hundreds of megabytes to <100 lines. Prints output to terminal. Pass this script a filepath to parse. You should be able to type "python3 build_log_simplifier.py" And then drag-and-drop a log file onto the terminal window to get its path. Sample usage: python3 development/build_log_simplifier.py Users/owengray/Desktop/build.log """) parser.add_argument("--validate", action="store_true", help="Validate that no unrecognized messages exist in the given log") parser.add_argument("--update", action="store_true", help="Update our list of recognized messages to include all messages from the given log") parser.add_argument("--gc", action="store_true", help="When generating a new exemptions file, exclude any exemptions that were not found in the given log. Only relevant with --update or --validate") parser.add_argument("log_path", help="Filepath of log(s) to process", nargs="+") # a regexes_matcher can quickly identify which of a set of regexes matches a given text class regexes_matcher(object): def __init__(self, regexes): self.regex_texts = regexes self.children = None self.matcher = None # returns a list of regexes that match the given text def get_matching_regexes(self, text, expect_match=True): if expect_match and len(self.regex_texts) > 1: # If we already expect our matcher to match, we can directly jump to asking our children return self.query_children_for_matching_regexes(text) # It takes more time to match lots of regexes than to match one composite regex # So, we try to match one composite regex first if self.matches(text): if len(self.regex_texts) > 1: # At least one child regex matches, so we have to determine which ones return self.query_children_for_matching_regexes(text) else: return self.regex_texts # Our composite regex yielded no matches return [] # queries our children for regexes that match def query_children_for_matching_regexes(self, text): # Create children if they don't yet exist self.ensure_split() # query children and join their results results = [] for child in self.children: results += child.get_matching_regexes(text, False) return results # Returns the index of the first regex matching this string, or None of not found def index_first_matching_regex(self, text): if len(self.regex_texts) <= 1: if len(self.regex_texts) == 0: return None if self.matches(text): return 0 return None if not self.matches(text): return None self.ensure_split() count = 0 for child in self.children: child_index = child.index_first_matching_regex(text) if child_index is not None: return count + child_index count += len(child.regex_texts) return None # Create children if they don't yet exist def ensure_split(self): if self.children is None: # It takes more time to compile a longer regex, but it also takes more time to # test lots of small regexes. # In practice, this number of children seems to result in fast execution num_children = min(len(self.regex_texts), 32) child_start = 0 self.children = [] for i in range(num_children): child_end = int(len(self.regex_texts) * (i + 1) / num_children) self.children.append(regexes_matcher(self.regex_texts[child_start:child_end])) child_start = child_end def matches(self, text): if self.matcher is None: full_regex_text = "(?:" + ")|(?:".join(self.regex_texts) + ")" self.matcher = re.compile(full_regex_text) return self.matcher.fullmatch(text) def print_failing_task_names(lines): tasks_of_interest = [] # first, find tasks of interest for line in lines: if line.startswith("Execution failed for task"): tasks_of_interest.append(line.split("task '")[1][:-3]) print("Detected these failing tasks: " + str(tasks_of_interest)) def shorten_uninteresting_stack_frames(lines): result = [] prev_line_is_boring = False for line in lines: if line.startswith("\tat ") and not line.startswith("\tat androidx"): # non-androidx stack frame if not prev_line_is_boring: result.append(line.replace("\n", "...\n")) prev_line_is_boring = True else: result.append(line) prev_line_is_boring = False return result # Returns the path of the config file holding exemptions for deterministic/consistent output. # These exemptions can be garbage collected via the `--gc` argument def get_deterministic_exemptions_path(): return os.path.join(dir_of_this_script, "messages.ignore") # Returns the path of the config file holding exemptions for nondetermistic/flaky output. # These exemptions will not be garbage collected via the `--gc` argument def get_flake_exemptions_path(): return os.path.join(dir_of_this_script, "message-flakes.ignore") # Returns a regexes_matcher that matches what is described by our config file # Ignores comments and ordering in our config file def build_exemptions_matcher(config_lines): config_lines = [line.replace("\n", "") for line in config_lines] regexes = [] for line in config_lines: line = line.strip() if line.startswith("#") or line == "": # skip comments continue regexes.append(line) if remove_control_characters(line) != line: raise Exception("Unexpected control characters found in configuration line:\n\n " + "'" + line + "'\n\n. This line is unexpected to match anything. Is this a copying mistake?") return regexes_matcher(sorted(regexes)) # Returns a regexes_matcher that matches the content of our config file # Can match comments # Respects ordering in the config # This is used for editing the config file itself def build_exemptions_code_matcher(config_lines): config_lines = [line.strip() for line in config_lines] regexes = [] for line in config_lines: line = line.strip() if line == "": continue regexes.append(line) return regexes_matcher(regexes) def remove_by_regexes(lines, config_lines, validate_no_duplicates): fast_matcher = build_exemptions_matcher(config_lines) result = [] for line in lines: stripped = line.strip() matching_exemptions = fast_matcher.get_matching_regexes(stripped, expect_match=True) if validate_no_duplicates and len(matching_exemptions) > 1: print("") print("build_log_simplifier.py: Invalid configuration: multiple message exemptions match the same message. Are some exemptions too broad?") print("") print("Line: '" + stripped + "'") print("") print(str(len(matching_exemptions)) + " Matching exemptions:") for exemption_text in matching_exemptions: print("'" + exemption_text + "'") exit(1) if len(matching_exemptions) < 1: result.append(line) return result def collapse_consecutive_blank_lines(lines): result = [] prev_blank = True for line in lines: if line.strip() == "": if not prev_blank: result.append(line) prev_blank = True else: result.append(line) prev_blank = False return result def remove_trailing_blank_lines(lines): while len(lines) > 0 and lines[-1].strip() == "": del lines[-1] return lines def extract_task_name(line): prefix = "> Task " if line.startswith(prefix): return line[len(prefix):].strip() return None def is_task_line(line): return extract_task_name(line) is not None def extract_task_names(lines): names = [] for line in lines: name = extract_task_name(line) if name is not None and name not in names: names.append(name) return names # If a task has no output (or only blank output), this function removes the task (and its output) # For example, turns this: # > Task :a # > Task :b # some message # # into this: # # > Task :b # some message def collapse_tasks_having_no_output(lines): result = [] # When we see a task name, we might not emit it if it doesn't have any output # This variable is that pending task name, or none if we have no pending task pending_task = None pending_blanks = [] for line in lines: is_section = is_task_line(line) or line.startswith("> Configure project ") or line.startswith("FAILURE: Build failed with an exception.") if is_section: pending_task = line pending_blanks = [] elif line.strip() == "": # If we have a pending task and we found a blank line, then hold the blank line, # and only output it if we later find some nonempty output if pending_task is not None: pending_blanks.append(line) else: result.append(line) else: # We found some nonempty output, now we emit any pending task names if pending_task is not None: result.append(pending_task) result += pending_blanks pending_task = None pending_blanks = [] result.append(line) return result # Removes color characters and other ANSI control characters from this input control_character_regex = re.compile(r""" \x1B # Escape (?: # 7-bit C1 Fe (except CSI) [@-Z\\-_] | # or [ for CSI, followed by a control sequence \[ [0-?]* # Parameters [ -/]* # Intermediate bytes [@-~] # End ) """, re.VERBOSE) def remove_control_characters(line): return control_character_regex.sub("", line) # Removes strings from the input wherever they are found # This list is less convenient than the .ignore files: # This list doesn't get autosuggested additions # This list isn't automatically garbage collected # Users interested in seeing the exemption history probably won't think to look here # This list does allow removing part of the text from a line and still validating the remainder of the line # If this list eventually gets long we might want to make it easier to update inline_ignores_regex = re.compile( # b/300072778 "Sharing is only supported for boot loader classes because bootstrap classpath has been appended" ) def remove_inline_ignores(line): return re.sub(inline_ignores_regex, "", line) # Normalizes some filepaths to more easily simplify/skip some messages def normalize_paths(lines): # get OUT_DIR, DIST_DIR, and the path of the root of the checkout out_dir = None dist_dir = None checkout_dir = None gradle_user_home = None # we read checkout_root from the log file in case this build was run in a location, # such as on a build server out_marker = "OUT_DIR=" dist_marker = "DIST_DIR=" checkout_marker = "CHECKOUT=" gradle_user_home_marker="GRADLE_USER_HOME=" for line in lines: if line.startswith(out_marker): out_dir = line.split(out_marker)[1].strip() continue if line.startswith(dist_marker): dist_dir = line.split(dist_marker)[1].strip() continue if line.startswith(checkout_marker): checkout_dir = line.split(checkout_marker)[1].strip() continue if line.startswith(gradle_user_home_marker): gradle_user_home = line.split(gradle_user_home_marker)[1].strip() continue if out_dir is not None and dist_dir is not None and checkout_dir is not None and gradle_user_home is not None: break # Remove any mentions of these paths, and replace them with consistent values # Make sure to put these paths in the correct order so that more-specific paths will # be matched first remove_paths = collections.OrderedDict() if gradle_user_home is not None: remove_paths[gradle_user_home] = "$GRADLE_USER_HOME" if dist_dir is not None: remove_paths[dist_dir] = "$DIST_DIR" if out_dir is not None: remove_paths[out_dir] = "$OUT_DIR" if checkout_dir is not None: remove_paths[checkout_dir + "/frameworks/support"] = "$SUPPORT" remove_paths[checkout_dir] = "$CHECKOUT" result = [] for line in lines: for path in remove_paths: if path in line: replacement = remove_paths[path] line = line.replace(path + "/", replacement + "/") line = line.replace(path, replacement) result.append(line) return result # Given a regex with hashes in it like ".gradle/caches/transforms-2/files-2.1/73f631f487bd87cfd8cb2aabafbac6a8", # tries to return a more generalized regex like ".gradle/caches/transforms-2/files-2.1/[0-9a-f]{32}" def generalize_hashes(message): hash_matcher = "[0-9a-f]{32}" return re.sub(hash_matcher, hash_matcher, message) # Given a regex with numbers in it like ".gradle/caches/transforms-2/files-2.1/73f631f487bd87cfd8cb2aabafbac6a8" # tries to return a more generalized regex like ".gradle/caches/transforms-[0-9]*/files-[0-9]*.[0-9]*/73f631f487bd87cfd8cb2aabafbac6a8" def generalize_numbers(message): matcher = "[0-9]+" generalized = re.sub(matcher, matcher, message) # the above replacement corrupts strings of the form "[0-9a-f]{32}", so we fix them before returning return generalized.replace("[[0-9]+-[0-9]+a-f]{[0-9]+}", "[0-9a-f]{32}") # Given a list of output messages and a list of existing exemption lines, # generates a new list of exemption lines def generate_suggested_exemptions(messages, config_lines, remove_unmatched_lines): new_config = suggest_missing_exemptions(messages, config_lines) if remove_unmatched_lines: new_config = remove_unmatched_exemptions(messages, new_config) return new_config # Given a list of output messages and a list of existing exemption lines, # generates an augmented list of exemptions containing any necessary new exemptions def suggest_missing_exemptions(messages, config_lines): # given a message, finds the index of the existing exemption for that message, if any existing_matcher = build_exemptions_code_matcher(config_lines) # the index of the previously matched exemption previous_found_index = -1 # map from line index to list of lines to insert there insertions_by_position = collections.defaultdict(lambda: []) insertions_by_task_name = collections.OrderedDict() # current task generating any subsequent output pending_task_line = None # new, suggested exemptions new_suggestions = set() # generate new suggestions for line in messages: line = line.strip() if line == "": continue # save task name is_section = False if is_task_line(line) or line.startswith("> Configure project "): # If a task creates output, we record its name line = "# " + line pending_task_line = line is_section = True # determine where to put task name current_found_index = existing_matcher.index_first_matching_regex(line) if current_found_index is not None: # We already have a mention of this line # We don't need to exempt it again, but this informs where to insert our next exemption previous_found_index = current_found_index pending_task_line = None continue # skip outputting task names for tasks that don't output anything if is_section: continue # escape message escaped = re.escape(line) escaped = escaped.replace("\ ", " ") # spaces don't need to be escaped escaped = generalize_hashes(escaped) escaped = generalize_numbers(escaped) # confirm that we haven't already inserted this message if escaped in new_suggestions: continue # insert this regex into an appropriate position if pending_task_line is not None: # We know which task this line came from, and it's a task that didn't previously make output if pending_task_line not in insertions_by_task_name: insertions_by_task_name[pending_task_line] = [] insertions_by_task_name[pending_task_line].append(escaped) else: # This line of output didn't come from a new task # So we append it after the previous line that we found insertions_by_position[previous_found_index].append(escaped) new_suggestions.add(escaped) # for each regex for which we chose a position in the file, insert it there exemption_lines = [] for i in range(len(existing_matcher.regex_texts)): exemption_lines.append(existing_matcher.regex_texts[i]) if i in insertions_by_position: exemption_lines += insertions_by_position[i] # for regexes that could not be assigned to a task, insert them next if -1 in insertions_by_position: exemption_lines += insertions_by_position[-1] # for regexes that were simply assigned to certain task names, insert the there, grouped by task for task_name in insertions_by_task_name: exemption_lines.append(task_name) exemption_lines += insertions_by_task_name[task_name] return exemption_lines # Searches for config lines in that match no line in # Create and returns a new list of config lines, which excludes unmatched lines and # any corresponding comments def remove_unmatched_exemptions(messages, config_lines): existing_matcher = build_exemptions_matcher(config_lines) matched_config_lines = set() # find all of the regexes that match at least one message for line in messages: line = line.strip() if line.startswith("#"): continue for regex in existing_matcher.get_matching_regexes(line): matched_config_lines.add(regex) # generate a new list of config lines # keep config lines that were matched in the list of messages # keep comments where there remains a matched config line before the next comment # skip comments that were previously followed by other config lines that were deleted result = [] pending_comments = [] # comments that we haven't yet decided to keep or not found_unused_line_after_comment = False for line in config_lines: if line.startswith("#"): # We found a comment if found_unused_line_after_comment: # We found an unused config line more recently than the previous comment, # and now we've found a new comment. if len(pending_comments) > 0: # We also haven't found any used config lines more recently than the previous comment # Presumably these pending comments were intended to describe the lines that we're removing # So, we skip emitting these pending comments too pending_comments = [] pending_comments.append(line) found_unused_line_after_comment = False continue matched = (line in matched_config_lines) if matched: # If this config line is being used, then we keep its comments too result += pending_comments pending_comments = [] result.append(line) else: found_unused_line_after_comment = True # If there are any comments at the bottom of the file, then keep them too if not found_unused_line_after_comment: result += pending_comments return result # opens a file and reads the lines in it def readlines(path): infile = open(path) lines = infile.readlines() infile.close() return lines def writelines(path, lines): destfile = open(path, 'w') destfile.write("\n".join(lines)) destfile.close() def main(): arguments = parser.parse_args() # read each file log_paths = arguments.log_path all_lines = [] for log_path in log_paths: lines = readlines(log_path) lines = [remove_control_characters(line) for line in lines] lines = [remove_inline_ignores(line) for line in lines] lines = normalize_paths(lines) all_lines += lines # load configuration flake_exemption_regexes = readlines(get_flake_exemptions_path()) deterministic_exemption_regexes = readlines(get_deterministic_exemptions_path()) exemption_regexes = flake_exemption_regexes + deterministic_exemption_regexes # load configuration # remove lines we're not interested in update = arguments.update or arguments.gc validate = update or arguments.validate interesting_lines = all_lines if not validate: print_failing_task_names(interesting_lines) interesting_lines = remove_by_regexes(interesting_lines, exemption_regexes, validate) interesting_lines = collapse_tasks_having_no_output(interesting_lines) interesting_lines = collapse_consecutive_blank_lines(interesting_lines) interesting_lines = remove_trailing_blank_lines(interesting_lines) # process results if update: if arguments.gc or len(interesting_lines) != 0: update_path = get_deterministic_exemptions_path() # filter out any inconsistently observed messages so we don't try to exempt them twice all_lines = remove_by_regexes(all_lines, flake_exemption_regexes, validate) # update the deterministic exemptions file based on the result suggested = generate_suggested_exemptions(all_lines, deterministic_exemption_regexes, arguments.gc) writelines(update_path, suggested) print("build_log_simplifier.py updated exemptions " + update_path) elif validate: if len(interesting_lines) != 0: print("") print("=" * 80) print("build_log_simplifier.py: Error: Found " + str(len(interesting_lines)) + " new lines of warning output!") print("") print("The new output:") print(" " + " ".join(interesting_lines)) print("") print("To reproduce this failure:") print(" Try $ ./gradlew -Pandroidx.validateNoUnrecognizedMessages --rerun-tasks " + " ".join(extract_task_names(interesting_lines))) print("") print("Instructions:") print(" If you can fix these messages, do so.") print(" If you cannot fix these messages, you may suppress them.") print(" To automatically suppress new output from build server builds, run development/build_log_simplifier/update.sh") print(" See also https://android.googlesource.com/platform/frameworks/support/+/androidx-main/development/build_log_simplifier/VALIDATION_FAILURE.md") print("") new_exemptions_path = log_paths[0] + ".ignore" # filter out any inconsistently observed messages so we don't try to exempt them twice all_lines = remove_by_regexes(all_lines, flake_exemption_regexes, validate) # update deterministic exemptions file based on the result suggested = generate_suggested_exemptions(all_lines, deterministic_exemption_regexes, arguments.gc) writelines(new_exemptions_path, suggested) print("Files:") print(" Full Log : " + ",".join(log_paths)) print(" Baseline : " + get_deterministic_exemptions_path()) print(" Autogenerated new baseline : " + new_exemptions_path) exit(1) else: interesting_lines = shorten_uninteresting_stack_frames(interesting_lines) print("".join(interesting_lines)) if __name__ == "__main__": main()