1#!/usr/bin/env python 2# Copyright (c) 2013 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5"""Applies edits generated by a clang tool that was run on Chromium code. 6 7Synopsis: 8 9 cat run_tool.out | extract_edits.py | apply_edits.py <build dir> <filters...> 10 11For example - to apply edits only to WTF sources: 12 13 ... | apply_edits.py out/gn third_party/WebKit/Source/wtf 14 15In addition to filters specified on the command line, the tool also skips edits 16that apply to files that are not covered by git. 17""" 18 19import argparse 20import collections 21import functools 22import multiprocessing 23import os 24import os.path 25import subprocess 26import sys 27 28script_dir = os.path.dirname(os.path.realpath(__file__)) 29tool_dir = os.path.abspath(os.path.join(script_dir, '../pylib')) 30sys.path.insert(0, tool_dir) 31 32from clang import compile_db 33 34Edit = collections.namedtuple('Edit', 35 ('edit_type', 'offset', 'length', 'replacement')) 36 37 38def _GetFilesFromGit(paths=None): 39 """Gets the list of files in the git repository. 40 41 Args: 42 paths: Prefix filter for the returned paths. May contain multiple entries. 43 """ 44 args = [] 45 if sys.platform == 'win32': 46 args.append('git.bat') 47 else: 48 args.append('git') 49 args.append('ls-files') 50 if paths: 51 args.extend(paths) 52 command = subprocess.Popen(args, stdout=subprocess.PIPE) 53 output, _ = command.communicate() 54 return [os.path.realpath(p) for p in output.splitlines()] 55 56 57def _ParseEditsFromStdin(build_directory): 58 """Extracts generated list of edits from the tool's stdout. 59 60 The expected format is documented at the top of this file. 61 62 Args: 63 build_directory: Directory that contains the compile database. Used to 64 normalize the filenames. 65 stdout: The stdout from running the clang tool. 66 67 Returns: 68 A dictionary mapping filenames to the associated edits. 69 """ 70 path_to_resolved_path = {} 71 def _ResolvePath(path): 72 if path in path_to_resolved_path: 73 return path_to_resolved_path[path] 74 75 if not os.path.isfile(path): 76 resolved_path = os.path.realpath(os.path.join(build_directory, path)) 77 else: 78 resolved_path = path 79 80 if not os.path.isfile(resolved_path): 81 sys.stderr.write('Edit applies to a non-existent file: %s\n' % path) 82 resolved_path = None 83 84 path_to_resolved_path[path] = resolved_path 85 return resolved_path 86 87 edits = collections.defaultdict(list) 88 for line in sys.stdin: 89 line = line.rstrip("\n\r") 90 try: 91 edit_type, path, offset, length, replacement = line.split(':::', 4) 92 replacement = replacement.replace('\0', '\n') 93 path = _ResolvePath(path) 94 if not path: continue 95 edits[path].append(Edit(edit_type, int(offset), int(length), replacement)) 96 except ValueError: 97 sys.stderr.write('Unable to parse edit: %s\n' % line) 98 return edits 99 100 101def _ApplyEditsToSingleFile(filename, edits): 102 # Sort the edits and iterate through them in reverse order. Sorting allows 103 # duplicate edits to be quickly skipped, while reversing means that 104 # subsequent edits don't need to have their offsets updated with each edit 105 # applied. 106 edit_count = 0 107 error_count = 0 108 edits.sort() 109 last_edit = None 110 with open(filename, 'rb+') as f: 111 contents = bytearray(f.read()) 112 for edit in reversed(edits): 113 if edit == last_edit: 114 continue 115 if (last_edit is not None and edit.edit_type == last_edit.edit_type and 116 edit.offset == last_edit.offset and edit.length == last_edit.length): 117 sys.stderr.write( 118 'Conflicting edit: %s at offset %d, length %d: "%s" != "%s"\n' % 119 (filename, edit.offset, edit.length, edit.replacement, 120 last_edit.replacement)) 121 error_count += 1 122 continue 123 124 last_edit = edit 125 contents[edit.offset:edit.offset + edit.length] = edit.replacement 126 if not edit.replacement: 127 _ExtendDeletionIfElementIsInList(contents, edit.offset) 128 edit_count += 1 129 f.seek(0) 130 f.truncate() 131 f.write(contents) 132 return (edit_count, error_count) 133 134 135def _ApplyEdits(edits): 136 """Apply the generated edits. 137 138 Args: 139 edits: A dict mapping filenames to Edit instances that apply to that file. 140 """ 141 edit_count = 0 142 error_count = 0 143 done_files = 0 144 for k, v in edits.iteritems(): 145 tmp_edit_count, tmp_error_count = _ApplyEditsToSingleFile(k, v) 146 edit_count += tmp_edit_count 147 error_count += tmp_error_count 148 done_files += 1 149 percentage = (float(done_files) / len(edits)) * 100 150 sys.stderr.write('Applied %d edits (%d errors) to %d files [%.2f%%]\r' % 151 (edit_count, error_count, done_files, percentage)) 152 153 sys.stderr.write('\n') 154 return -error_count 155 156 157_WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' '))) 158 159 160def _ExtendDeletionIfElementIsInList(contents, offset): 161 """Extends the range of a deletion if the deleted element was part of a list. 162 163 This rewriter helper makes it easy for refactoring tools to remove elements 164 from a list. Even if a matcher callback knows that it is removing an element 165 from a list, it may not have enough information to accurately remove the list 166 element; for example, another matcher callback may end up removing an adjacent 167 list element, or all the list elements may end up being removed. 168 169 With this helper, refactoring tools can simply remove the list element and not 170 worry about having to include the comma in the replacement. 171 172 Args: 173 contents: A bytearray with the deletion already applied. 174 offset: The offset in the bytearray where the deleted range used to be. 175 """ 176 char_before = char_after = None 177 left_trim_count = 0 178 for byte in reversed(contents[:offset]): 179 left_trim_count += 1 180 if byte in _WHITESPACE_BYTES: 181 continue 182 if byte in (ord(','), ord(':'), ord('('), ord('{')): 183 char_before = chr(byte) 184 break 185 186 right_trim_count = 0 187 for byte in contents[offset:]: 188 right_trim_count += 1 189 if byte in _WHITESPACE_BYTES: 190 continue 191 if byte == ord(','): 192 char_after = chr(byte) 193 break 194 195 if char_before: 196 if char_after: 197 del contents[offset:offset + right_trim_count] 198 elif char_before in (',', ':'): 199 del contents[offset - left_trim_count:offset] 200 201 202def main(): 203 parser = argparse.ArgumentParser() 204 parser.add_argument( 205 'build_directory', 206 help='path to the build dir (dir that edit paths are relative to)') 207 parser.add_argument( 208 'path_filter', 209 nargs='*', 210 help='optional paths to filter what files the tool is run on') 211 args = parser.parse_args() 212 213 filenames = set(_GetFilesFromGit(args.path_filter)) 214 edits = _ParseEditsFromStdin(args.build_directory) 215 return _ApplyEdits( 216 {k: v for k, v in edits.iteritems() 217 if os.path.realpath(k) in filenames}) 218 219 220if __name__ == '__main__': 221 sys.exit(main()) 222