1#!/usr/bin/env python2 2# 3#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===# 4# 5# The LLVM Compiler Infrastructure 6# 7# This file is distributed under the University of Illinois Open Source 8# License. See LICENSE.TXT for details. 9# 10#===------------------------------------------------------------------------===# 11 12r""" 13clang-format git integration 14============================ 15 16This file provides a clang-format integration for git. Put it somewhere in your 17path and ensure that it is executable. Then, "git clang-format" will invoke 18clang-format on the changes in current files or a specific commit. 19 20For further details, run: 21git clang-format -h 22 23Requires Python 2.7 24""" 25 26import argparse 27import collections 28import contextlib 29import errno 30import os 31import re 32import subprocess 33import sys 34 35usage = 'git clang-format [OPTIONS] [<commit>] [--] [<file>...]' 36 37desc = ''' 38Run clang-format on all lines that differ between the working directory 39and <commit>, which defaults to HEAD. Changes are only applied to the working 40directory. 41 42The following git-config settings set the default of the corresponding option: 43 clangFormat.binary 44 clangFormat.commit 45 clangFormat.extension 46 clangFormat.style 47''' 48 49# Name of the temporary index file in which save the output of clang-format. 50# This file is created within the .git directory. 51temp_index_basename = 'clang-format-index' 52 53 54Range = collections.namedtuple('Range', 'start, count') 55 56 57def main(): 58 config = load_git_config() 59 60 # In order to keep '--' yet allow options after positionals, we need to 61 # check for '--' ourselves. (Setting nargs='*' throws away the '--', while 62 # nargs=argparse.REMAINDER disallows options after positionals.) 63 argv = sys.argv[1:] 64 try: 65 idx = argv.index('--') 66 except ValueError: 67 dash_dash = [] 68 else: 69 dash_dash = argv[idx:] 70 argv = argv[:idx] 71 72 default_extensions = ','.join([ 73 # From clang/lib/Frontend/FrontendOptions.cpp, all lower case 74 'c', 'h', # C 75 'm', # ObjC 76 'mm', # ObjC++ 77 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp', # C++ 78 # Other languages that clang-format supports 79 'proto', 'protodevel', # Protocol Buffers 80 'js', # JavaScript 81 'ts', # TypeScript 82 ]) 83 84 p = argparse.ArgumentParser( 85 usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter, 86 description=desc) 87 p.add_argument('--binary', 88 default=config.get('clangformat.binary', 'clang-format'), 89 help='path to clang-format'), 90 p.add_argument('--commit', 91 default=config.get('clangformat.commit', 'HEAD'), 92 help='default commit to use if none is specified'), 93 p.add_argument('--diff', action='store_true', 94 help='print a diff instead of applying the changes') 95 p.add_argument('--extensions', 96 default=config.get('clangformat.extensions', 97 default_extensions), 98 help=('comma-separated list of file extensions to format, ' 99 'excluding the period and case-insensitive')), 100 p.add_argument('-f', '--force', action='store_true', 101 help='allow changes to unstaged files') 102 p.add_argument('-p', '--patch', action='store_true', 103 help='select hunks interactively') 104 p.add_argument('-q', '--quiet', action='count', default=0, 105 help='print less information') 106 p.add_argument('--style', 107 default=config.get('clangformat.style', None), 108 help='passed to clang-format'), 109 p.add_argument('-v', '--verbose', action='count', default=0, 110 help='print extra information') 111 # We gather all the remaining positional arguments into 'args' since we need 112 # to use some heuristics to determine whether or not <commit> was present. 113 # However, to print pretty messages, we make use of metavar and help. 114 p.add_argument('args', nargs='*', metavar='<commit>', 115 help='revision from which to compute the diff') 116 p.add_argument('ignored', nargs='*', metavar='<file>...', 117 help='if specified, only consider differences in these files') 118 opts = p.parse_args(argv) 119 120 opts.verbose -= opts.quiet 121 del opts.quiet 122 123 commit, files = interpret_args(opts.args, dash_dash, opts.commit) 124 changed_lines = compute_diff_and_extract_lines(commit, files) 125 if opts.verbose >= 1: 126 ignored_files = set(changed_lines) 127 filter_by_extension(changed_lines, opts.extensions.lower().split(',')) 128 if opts.verbose >= 1: 129 ignored_files.difference_update(changed_lines) 130 if ignored_files: 131 print 'Ignoring changes in the following files (wrong extension):' 132 for filename in ignored_files: 133 print ' ', filename 134 if changed_lines: 135 print 'Running clang-format on the following files:' 136 for filename in changed_lines: 137 print ' ', filename 138 else: 139 print 'no modified files to format' 140 return 141 # The computed diff outputs absolute paths, so we must cd before accessing 142 # those files. 143 cd_to_toplevel() 144 old_tree = create_tree_from_workdir(changed_lines) 145 new_tree = run_clang_format_and_save_to_tree(changed_lines, 146 binary=opts.binary, 147 style=opts.style) 148 if opts.verbose >= 1: 149 print 'old tree:', old_tree 150 print 'new tree:', new_tree 151 if old_tree == new_tree: 152 if opts.verbose >= 0: 153 print 'clang-format did not modify any files' 154 elif opts.diff: 155 print_diff(old_tree, new_tree) 156 else: 157 changed_files = apply_changes(old_tree, new_tree, force=opts.force, 158 patch_mode=opts.patch) 159 if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1: 160 print 'changed files:' 161 for filename in changed_files: 162 print ' ', filename 163 164 165def load_git_config(non_string_options=None): 166 """Return the git configuration as a dictionary. 167 168 All options are assumed to be strings unless in `non_string_options`, in which 169 is a dictionary mapping option name (in lower case) to either "--bool" or 170 "--int".""" 171 if non_string_options is None: 172 non_string_options = {} 173 out = {} 174 for entry in run('git', 'config', '--list', '--null').split('\0'): 175 if entry: 176 name, value = entry.split('\n', 1) 177 if name in non_string_options: 178 value = run('git', 'config', non_string_options[name], name) 179 out[name] = value 180 return out 181 182 183def interpret_args(args, dash_dash, default_commit): 184 """Interpret `args` as "[commit] [--] [files...]" and return (commit, files). 185 186 It is assumed that "--" and everything that follows has been removed from 187 args and placed in `dash_dash`. 188 189 If "--" is present (i.e., `dash_dash` is non-empty), the argument to its 190 left (if present) is taken as commit. Otherwise, the first argument is 191 checked if it is a commit or a file. If commit is not given, 192 `default_commit` is used.""" 193 if dash_dash: 194 if len(args) == 0: 195 commit = default_commit 196 elif len(args) > 1: 197 die('at most one commit allowed; %d given' % len(args)) 198 else: 199 commit = args[0] 200 object_type = get_object_type(commit) 201 if object_type not in ('commit', 'tag'): 202 if object_type is None: 203 die("'%s' is not a commit" % commit) 204 else: 205 die("'%s' is a %s, but a commit was expected" % (commit, object_type)) 206 files = dash_dash[1:] 207 elif args: 208 if disambiguate_revision(args[0]): 209 commit = args[0] 210 files = args[1:] 211 else: 212 commit = default_commit 213 files = args 214 else: 215 commit = default_commit 216 files = [] 217 return commit, files 218 219 220def disambiguate_revision(value): 221 """Returns True if `value` is a revision, False if it is a file, or dies.""" 222 # If `value` is ambiguous (neither a commit nor a file), the following 223 # command will die with an appropriate error message. 224 run('git', 'rev-parse', value, verbose=False) 225 object_type = get_object_type(value) 226 if object_type is None: 227 return False 228 if object_type in ('commit', 'tag'): 229 return True 230 die('`%s` is a %s, but a commit or filename was expected' % 231 (value, object_type)) 232 233 234def get_object_type(value): 235 """Returns a string description of an object's type, or None if it is not 236 a valid git object.""" 237 cmd = ['git', 'cat-file', '-t', value] 238 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 239 stdout, stderr = p.communicate() 240 if p.returncode != 0: 241 return None 242 return stdout.strip() 243 244 245def compute_diff_and_extract_lines(commit, files): 246 """Calls compute_diff() followed by extract_lines().""" 247 diff_process = compute_diff(commit, files) 248 changed_lines = extract_lines(diff_process.stdout) 249 diff_process.stdout.close() 250 diff_process.wait() 251 if diff_process.returncode != 0: 252 # Assume error was already printed to stderr. 253 sys.exit(2) 254 return changed_lines 255 256 257def compute_diff(commit, files): 258 """Return a subprocess object producing the diff from `commit`. 259 260 The return value's `stdin` file object will produce a patch with the 261 differences between the working directory and `commit`, filtered on `files` 262 (if non-empty). Zero context lines are used in the patch.""" 263 cmd = ['git', 'diff-index', '-p', '-U0', commit, '--'] 264 cmd.extend(files) 265 p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 266 p.stdin.close() 267 return p 268 269 270def extract_lines(patch_file): 271 """Extract the changed lines in `patch_file`. 272 273 The return value is a dictionary mapping filename to a list of (start_line, 274 line_count) pairs. 275 276 The input must have been produced with ``-U0``, meaning unidiff format with 277 zero lines of context. The return value is a dict mapping filename to a 278 list of line `Range`s.""" 279 matches = {} 280 for line in patch_file: 281 match = re.search(r'^\+\+\+\ [^/]+/(.*)', line) 282 if match: 283 filename = match.group(1).rstrip('\r\n') 284 match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line) 285 if match: 286 start_line = int(match.group(1)) 287 line_count = 1 288 if match.group(3): 289 line_count = int(match.group(3)) 290 if line_count > 0: 291 matches.setdefault(filename, []).append(Range(start_line, line_count)) 292 return matches 293 294 295def filter_by_extension(dictionary, allowed_extensions): 296 """Delete every key in `dictionary` that doesn't have an allowed extension. 297 298 `allowed_extensions` must be a collection of lowercase file extensions, 299 excluding the period.""" 300 allowed_extensions = frozenset(allowed_extensions) 301 for filename in dictionary.keys(): 302 base_ext = filename.rsplit('.', 1) 303 if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions: 304 del dictionary[filename] 305 306 307def cd_to_toplevel(): 308 """Change to the top level of the git repository.""" 309 toplevel = run('git', 'rev-parse', '--show-toplevel') 310 os.chdir(toplevel) 311 312 313def create_tree_from_workdir(filenames): 314 """Create a new git tree with the given files from the working directory. 315 316 Returns the object ID (SHA-1) of the created tree.""" 317 return create_tree(filenames, '--stdin') 318 319 320def run_clang_format_and_save_to_tree(changed_lines, binary='clang-format', 321 style=None): 322 """Run clang-format on each file and save the result to a git tree. 323 324 Returns the object ID (SHA-1) of the created tree.""" 325 def index_info_generator(): 326 for filename, line_ranges in changed_lines.iteritems(): 327 mode = oct(os.stat(filename).st_mode) 328 blob_id = clang_format_to_blob(filename, line_ranges, binary=binary, 329 style=style) 330 yield '%s %s\t%s' % (mode, blob_id, filename) 331 return create_tree(index_info_generator(), '--index-info') 332 333 334def create_tree(input_lines, mode): 335 """Create a tree object from the given input. 336 337 If mode is '--stdin', it must be a list of filenames. If mode is 338 '--index-info' is must be a list of values suitable for "git update-index 339 --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode 340 is invalid.""" 341 assert mode in ('--stdin', '--index-info') 342 cmd = ['git', 'update-index', '--add', '-z', mode] 343 with temporary_index_file(): 344 p = subprocess.Popen(cmd, stdin=subprocess.PIPE) 345 for line in input_lines: 346 p.stdin.write('%s\0' % line) 347 p.stdin.close() 348 if p.wait() != 0: 349 die('`%s` failed' % ' '.join(cmd)) 350 tree_id = run('git', 'write-tree') 351 return tree_id 352 353 354def clang_format_to_blob(filename, line_ranges, binary='clang-format', 355 style=None): 356 """Run clang-format on the given file and save the result to a git blob. 357 358 Returns the object ID (SHA-1) of the created blob.""" 359 clang_format_cmd = [binary, filename] 360 if style: 361 clang_format_cmd.extend(['-style='+style]) 362 clang_format_cmd.extend([ 363 '-lines=%s:%s' % (start_line, start_line+line_count-1) 364 for start_line, line_count in line_ranges]) 365 try: 366 clang_format = subprocess.Popen(clang_format_cmd, stdin=subprocess.PIPE, 367 stdout=subprocess.PIPE) 368 except OSError as e: 369 if e.errno == errno.ENOENT: 370 die('cannot find executable "%s"' % binary) 371 else: 372 raise 373 clang_format.stdin.close() 374 hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin'] 375 hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout, 376 stdout=subprocess.PIPE) 377 clang_format.stdout.close() 378 stdout = hash_object.communicate()[0] 379 if hash_object.returncode != 0: 380 die('`%s` failed' % ' '.join(hash_object_cmd)) 381 if clang_format.wait() != 0: 382 die('`%s` failed' % ' '.join(clang_format_cmd)) 383 return stdout.rstrip('\r\n') 384 385 386@contextlib.contextmanager 387def temporary_index_file(tree=None): 388 """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting 389 the file afterward.""" 390 index_path = create_temporary_index(tree) 391 old_index_path = os.environ.get('GIT_INDEX_FILE') 392 os.environ['GIT_INDEX_FILE'] = index_path 393 try: 394 yield 395 finally: 396 if old_index_path is None: 397 del os.environ['GIT_INDEX_FILE'] 398 else: 399 os.environ['GIT_INDEX_FILE'] = old_index_path 400 os.remove(index_path) 401 402 403def create_temporary_index(tree=None): 404 """Create a temporary index file and return the created file's path. 405 406 If `tree` is not None, use that as the tree to read in. Otherwise, an 407 empty index is created.""" 408 gitdir = run('git', 'rev-parse', '--git-dir') 409 path = os.path.join(gitdir, temp_index_basename) 410 if tree is None: 411 tree = '--empty' 412 run('git', 'read-tree', '--index-output='+path, tree) 413 return path 414 415 416def print_diff(old_tree, new_tree): 417 """Print the diff between the two trees to stdout.""" 418 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output 419 # is expected to be viewed by the user, and only the former does nice things 420 # like color and pagination. 421 subprocess.check_call(['git', 'diff', old_tree, new_tree, '--']) 422 423 424def apply_changes(old_tree, new_tree, force=False, patch_mode=False): 425 """Apply the changes in `new_tree` to the working directory. 426 427 Bails if there are local changes in those files and not `force`. If 428 `patch_mode`, runs `git checkout --patch` to select hunks interactively.""" 429 changed_files = run('git', 'diff-tree', '-r', '-z', '--name-only', old_tree, 430 new_tree).rstrip('\0').split('\0') 431 if not force: 432 unstaged_files = run('git', 'diff-files', '--name-status', *changed_files) 433 if unstaged_files: 434 print >>sys.stderr, ('The following files would be modified but ' 435 'have unstaged changes:') 436 print >>sys.stderr, unstaged_files 437 print >>sys.stderr, 'Please commit, stage, or stash them first.' 438 sys.exit(2) 439 if patch_mode: 440 # In patch mode, we could just as well create an index from the new tree 441 # and checkout from that, but then the user will be presented with a 442 # message saying "Discard ... from worktree". Instead, we use the old 443 # tree as the index and checkout from new_tree, which gives the slightly 444 # better message, "Apply ... to index and worktree". This is not quite 445 # right, since it won't be applied to the user's index, but oh well. 446 with temporary_index_file(old_tree): 447 subprocess.check_call(['git', 'checkout', '--patch', new_tree]) 448 index_tree = old_tree 449 else: 450 with temporary_index_file(new_tree): 451 run('git', 'checkout-index', '-a', '-f') 452 return changed_files 453 454 455def run(*args, **kwargs): 456 stdin = kwargs.pop('stdin', '') 457 verbose = kwargs.pop('verbose', True) 458 strip = kwargs.pop('strip', True) 459 for name in kwargs: 460 raise TypeError("run() got an unexpected keyword argument '%s'" % name) 461 p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, 462 stdin=subprocess.PIPE) 463 stdout, stderr = p.communicate(input=stdin) 464 if p.returncode == 0: 465 if stderr: 466 if verbose: 467 print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args) 468 print >>sys.stderr, stderr.rstrip() 469 if strip: 470 stdout = stdout.rstrip('\r\n') 471 return stdout 472 if verbose: 473 print >>sys.stderr, '`%s` returned %s' % (' '.join(args), p.returncode) 474 if stderr: 475 print >>sys.stderr, stderr.rstrip() 476 sys.exit(2) 477 478 479def die(message): 480 print >>sys.stderr, 'error:', message 481 sys.exit(2) 482 483 484if __name__ == '__main__': 485 main() 486