1#!/usr/bin/env python 2# 3#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===# 4# 5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 6# See https://llvm.org/LICENSE.txt for license information. 7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 8# 9#===------------------------------------------------------------------------===# 10 11r""" 12clang-format git integration 13============================ 14 15This file provides a clang-format integration for git. Put it somewhere in your 16path and ensure that it is executable. Then, "git clang-format" will invoke 17clang-format on the changes in current files or a specific commit. 18 19For further details, run: 20git clang-format -h 21 22Requires Python 2.7 or Python 3 23""" 24 25from __future__ import absolute_import, division, print_function 26import argparse 27import collections 28import contextlib 29import errno 30import os 31import re 32import subprocess 33import sys 34 35usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]' 36 37desc = ''' 38If zero or one commits are given, run clang-format on all lines that differ 39between the working directory and <commit>, which defaults to HEAD. Changes are 40only applied to the working directory. 41 42If two commits are given (requires --diff), run clang-format on all lines in the 43second <commit> that differ from the first <commit>. 44 45The following git-config settings set the default of the corresponding option: 46 clangFormat.binary 47 clangFormat.commit 48 clangFormat.extensions 49 clangFormat.style 50''' 51 52# Name of the temporary index file in which save the output of clang-format. 53# This file is created within the .git directory. 54temp_index_basename = 'clang-format-index' 55 56 57Range = collections.namedtuple('Range', 'start, count') 58 59 60def main(): 61 config = load_git_config() 62 63 # In order to keep '--' yet allow options after positionals, we need to 64 # check for '--' ourselves. (Setting nargs='*' throws away the '--', while 65 # nargs=argparse.REMAINDER disallows options after positionals.) 66 argv = sys.argv[1:] 67 try: 68 idx = argv.index('--') 69 except ValueError: 70 dash_dash = [] 71 else: 72 dash_dash = argv[idx:] 73 argv = argv[:idx] 74 75 default_extensions = ','.join([ 76 # From clang/lib/Frontend/FrontendOptions.cpp, all lower case 77 'c', 'h', # C 78 'm', # ObjC 79 'mm', # ObjC++ 80 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hh', 'hpp', 'hxx', # C++ 81 'cu', 'cuh', # CUDA 82 # Other languages that clang-format supports 83 'proto', 'protodevel', # Protocol Buffers 84 'java', # Java 85 'js', # JavaScript 86 'ts', # TypeScript 87 'cs', # C Sharp 88 ]) 89 90 p = argparse.ArgumentParser( 91 usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter, 92 description=desc) 93 p.add_argument('--binary', 94 default=config.get('clangformat.binary', 'clang-format'), 95 help='path to clang-format'), 96 p.add_argument('--commit', 97 default=config.get('clangformat.commit', 'HEAD'), 98 help='default commit to use if none is specified'), 99 p.add_argument('--diff', action='store_true', 100 help='print a diff instead of applying the changes') 101 p.add_argument('--extensions', 102 default=config.get('clangformat.extensions', 103 default_extensions), 104 help=('comma-separated list of file extensions to format, ' 105 'excluding the period and case-insensitive')), 106 p.add_argument('-f', '--force', action='store_true', 107 help='allow changes to unstaged files') 108 p.add_argument('-p', '--patch', action='store_true', 109 help='select hunks interactively') 110 p.add_argument('-q', '--quiet', action='count', default=0, 111 help='print less information') 112 p.add_argument('--style', 113 default=config.get('clangformat.style', None), 114 help='passed to clang-format'), 115 p.add_argument('-v', '--verbose', action='count', default=0, 116 help='print extra information') 117 # We gather all the remaining positional arguments into 'args' since we need 118 # to use some heuristics to determine whether or not <commit> was present. 119 # However, to print pretty messages, we make use of metavar and help. 120 p.add_argument('args', nargs='*', metavar='<commit>', 121 help='revision from which to compute the diff') 122 p.add_argument('ignored', nargs='*', metavar='<file>...', 123 help='if specified, only consider differences in these files') 124 opts = p.parse_args(argv) 125 126 opts.verbose -= opts.quiet 127 del opts.quiet 128 129 commits, files = interpret_args(opts.args, dash_dash, opts.commit) 130 if len(commits) > 1: 131 if not opts.diff: 132 die('--diff is required when two commits are given') 133 else: 134 if len(commits) > 2: 135 die('at most two commits allowed; %d given' % len(commits)) 136 changed_lines = compute_diff_and_extract_lines(commits, files) 137 if opts.verbose >= 1: 138 ignored_files = set(changed_lines) 139 filter_by_extension(changed_lines, opts.extensions.lower().split(',')) 140 if opts.verbose >= 1: 141 ignored_files.difference_update(changed_lines) 142 if ignored_files: 143 print('Ignoring changes in the following files (wrong extension):') 144 for filename in ignored_files: 145 print(' %s' % filename) 146 if changed_lines: 147 print('Running clang-format on the following files:') 148 for filename in changed_lines: 149 print(' %s' % filename) 150 if not changed_lines: 151 if opts.verbose >= 0: 152 print('no modified files to format') 153 return 154 # The computed diff outputs absolute paths, so we must cd before accessing 155 # those files. 156 cd_to_toplevel() 157 if len(commits) > 1: 158 old_tree = commits[1] 159 new_tree = run_clang_format_and_save_to_tree(changed_lines, 160 revision=commits[1], 161 binary=opts.binary, 162 style=opts.style) 163 else: 164 old_tree = create_tree_from_workdir(changed_lines) 165 new_tree = run_clang_format_and_save_to_tree(changed_lines, 166 binary=opts.binary, 167 style=opts.style) 168 if opts.verbose >= 1: 169 print('old tree: %s' % old_tree) 170 print('new tree: %s' % new_tree) 171 if old_tree == new_tree: 172 if opts.verbose >= 0: 173 print('clang-format did not modify any files') 174 elif opts.diff: 175 print_diff(old_tree, new_tree) 176 else: 177 changed_files = apply_changes(old_tree, new_tree, force=opts.force, 178 patch_mode=opts.patch) 179 if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1: 180 print('changed files:') 181 for filename in changed_files: 182 print(' %s' % filename) 183 184 185def load_git_config(non_string_options=None): 186 """Return the git configuration as a dictionary. 187 188 All options are assumed to be strings unless in `non_string_options`, in which 189 is a dictionary mapping option name (in lower case) to either "--bool" or 190 "--int".""" 191 if non_string_options is None: 192 non_string_options = {} 193 out = {} 194 for entry in run('git', 'config', '--list', '--null').split('\0'): 195 if entry: 196 if '\n' in entry: 197 name, value = entry.split('\n', 1) 198 else: 199 # A setting with no '=' ('\n' with --null) is implicitly 'true' 200 name = entry 201 value = 'true' 202 if name in non_string_options: 203 value = run('git', 'config', non_string_options[name], name) 204 out[name] = value 205 return out 206 207 208def interpret_args(args, dash_dash, default_commit): 209 """Interpret `args` as "[commits] [--] [files]" and return (commits, files). 210 211 It is assumed that "--" and everything that follows has been removed from 212 args and placed in `dash_dash`. 213 214 If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its 215 left (if present) are taken as commits. Otherwise, the arguments are checked 216 from left to right if they are commits or files. If commits are not given, 217 a list with `default_commit` is used.""" 218 if dash_dash: 219 if len(args) == 0: 220 commits = [default_commit] 221 else: 222 commits = args 223 for commit in commits: 224 object_type = get_object_type(commit) 225 if object_type not in ('commit', 'tag'): 226 if object_type is None: 227 die("'%s' is not a commit" % commit) 228 else: 229 die("'%s' is a %s, but a commit was expected" % (commit, object_type)) 230 files = dash_dash[1:] 231 elif args: 232 commits = [] 233 while args: 234 if not disambiguate_revision(args[0]): 235 break 236 commits.append(args.pop(0)) 237 if not commits: 238 commits = [default_commit] 239 files = args 240 else: 241 commits = [default_commit] 242 files = [] 243 return commits, files 244 245 246def disambiguate_revision(value): 247 """Returns True if `value` is a revision, False if it is a file, or dies.""" 248 # If `value` is ambiguous (neither a commit nor a file), the following 249 # command will die with an appropriate error message. 250 run('git', 'rev-parse', value, verbose=False) 251 object_type = get_object_type(value) 252 if object_type is None: 253 return False 254 if object_type in ('commit', 'tag'): 255 return True 256 die('`%s` is a %s, but a commit or filename was expected' % 257 (value, object_type)) 258 259 260def get_object_type(value): 261 """Returns a string description of an object's type, or None if it is not 262 a valid git object.""" 263 cmd = ['git', 'cat-file', '-t', value] 264 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 265 stdout, stderr = p.communicate() 266 if p.returncode != 0: 267 return None 268 return convert_string(stdout.strip()) 269 270 271def compute_diff_and_extract_lines(commits, files): 272 """Calls compute_diff() followed by extract_lines().""" 273 diff_process = compute_diff(commits, files) 274 changed_lines = extract_lines(diff_process.stdout) 275 diff_process.stdout.close() 276 diff_process.wait() 277 if diff_process.returncode != 0: 278 # Assume error was already printed to stderr. 279 sys.exit(2) 280 return changed_lines 281 282 283def compute_diff(commits, files): 284 """Return a subprocess object producing the diff from `commits`. 285 286 The return value's `stdin` file object will produce a patch with the 287 differences between the working directory and the first commit if a single 288 one was specified, or the difference between both specified commits, filtered 289 on `files` (if non-empty). Zero context lines are used in the patch.""" 290 git_tool = 'diff-index' 291 if len(commits) > 1: 292 git_tool = 'diff-tree' 293 cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--'] 294 cmd.extend(files) 295 p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 296 p.stdin.close() 297 return p 298 299 300def extract_lines(patch_file): 301 """Extract the changed lines in `patch_file`. 302 303 The return value is a dictionary mapping filename to a list of (start_line, 304 line_count) pairs. 305 306 The input must have been produced with ``-U0``, meaning unidiff format with 307 zero lines of context. The return value is a dict mapping filename to a 308 list of line `Range`s.""" 309 matches = {} 310 for line in patch_file: 311 line = convert_string(line) 312 match = re.search(r'^\+\+\+\ [^/]+/(.*)', line) 313 if match: 314 filename = match.group(1).rstrip('\r\n') 315 match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line) 316 if match: 317 start_line = int(match.group(1)) 318 line_count = 1 319 if match.group(3): 320 line_count = int(match.group(3)) 321 if line_count > 0: 322 matches.setdefault(filename, []).append(Range(start_line, line_count)) 323 return matches 324 325 326def filter_by_extension(dictionary, allowed_extensions): 327 """Delete every key in `dictionary` that doesn't have an allowed extension. 328 329 `allowed_extensions` must be a collection of lowercase file extensions, 330 excluding the period.""" 331 allowed_extensions = frozenset(allowed_extensions) 332 for filename in list(dictionary.keys()): 333 base_ext = filename.rsplit('.', 1) 334 if len(base_ext) == 1 and '' in allowed_extensions: 335 continue 336 if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions: 337 del dictionary[filename] 338 339 340def cd_to_toplevel(): 341 """Change to the top level of the git repository.""" 342 toplevel = run('git', 'rev-parse', '--show-toplevel') 343 os.chdir(toplevel) 344 345 346def create_tree_from_workdir(filenames): 347 """Create a new git tree with the given files from the working directory. 348 349 Returns the object ID (SHA-1) of the created tree.""" 350 return create_tree(filenames, '--stdin') 351 352 353def run_clang_format_and_save_to_tree(changed_lines, revision=None, 354 binary='clang-format', style=None): 355 """Run clang-format on each file and save the result to a git tree. 356 357 Returns the object ID (SHA-1) of the created tree.""" 358 def iteritems(container): 359 try: 360 return container.iteritems() # Python 2 361 except AttributeError: 362 return container.items() # Python 3 363 def index_info_generator(): 364 for filename, line_ranges in iteritems(changed_lines): 365 if revision: 366 git_metadata_cmd = ['git', 'ls-tree', 367 '%s:%s' % (revision, os.path.dirname(filename)), 368 os.path.basename(filename)] 369 git_metadata = subprocess.Popen(git_metadata_cmd, stdin=subprocess.PIPE, 370 stdout=subprocess.PIPE) 371 stdout = git_metadata.communicate()[0] 372 mode = oct(int(stdout.split()[0], 8)) 373 else: 374 mode = oct(os.stat(filename).st_mode) 375 # Adjust python3 octal format so that it matches what git expects 376 if mode.startswith('0o'): 377 mode = '0' + mode[2:] 378 blob_id = clang_format_to_blob(filename, line_ranges, 379 revision=revision, 380 binary=binary, 381 style=style) 382 yield '%s %s\t%s' % (mode, blob_id, filename) 383 return create_tree(index_info_generator(), '--index-info') 384 385 386def create_tree(input_lines, mode): 387 """Create a tree object from the given input. 388 389 If mode is '--stdin', it must be a list of filenames. If mode is 390 '--index-info' is must be a list of values suitable for "git update-index 391 --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode 392 is invalid.""" 393 assert mode in ('--stdin', '--index-info') 394 cmd = ['git', 'update-index', '--add', '-z', mode] 395 with temporary_index_file(): 396 p = subprocess.Popen(cmd, stdin=subprocess.PIPE) 397 for line in input_lines: 398 p.stdin.write(to_bytes('%s\0' % line)) 399 p.stdin.close() 400 if p.wait() != 0: 401 die('`%s` failed' % ' '.join(cmd)) 402 tree_id = run('git', 'write-tree') 403 return tree_id 404 405 406def clang_format_to_blob(filename, line_ranges, revision=None, 407 binary='clang-format', style=None): 408 """Run clang-format on the given file and save the result to a git blob. 409 410 Runs on the file in `revision` if not None, or on the file in the working 411 directory if `revision` is None. 412 413 Returns the object ID (SHA-1) of the created blob.""" 414 clang_format_cmd = [binary] 415 if style: 416 clang_format_cmd.extend(['-style='+style]) 417 clang_format_cmd.extend([ 418 '-lines=%s:%s' % (start_line, start_line+line_count-1) 419 for start_line, line_count in line_ranges]) 420 if revision: 421 clang_format_cmd.extend(['-assume-filename='+filename]) 422 git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)] 423 git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE, 424 stdout=subprocess.PIPE) 425 git_show.stdin.close() 426 clang_format_stdin = git_show.stdout 427 else: 428 clang_format_cmd.extend([filename]) 429 git_show = None 430 clang_format_stdin = subprocess.PIPE 431 try: 432 clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin, 433 stdout=subprocess.PIPE) 434 if clang_format_stdin == subprocess.PIPE: 435 clang_format_stdin = clang_format.stdin 436 except OSError as e: 437 if e.errno == errno.ENOENT: 438 die('cannot find executable "%s"' % binary) 439 else: 440 raise 441 clang_format_stdin.close() 442 hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin'] 443 hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout, 444 stdout=subprocess.PIPE) 445 clang_format.stdout.close() 446 stdout = hash_object.communicate()[0] 447 if hash_object.returncode != 0: 448 die('`%s` failed' % ' '.join(hash_object_cmd)) 449 if clang_format.wait() != 0: 450 die('`%s` failed' % ' '.join(clang_format_cmd)) 451 if git_show and git_show.wait() != 0: 452 die('`%s` failed' % ' '.join(git_show_cmd)) 453 return convert_string(stdout).rstrip('\r\n') 454 455 456@contextlib.contextmanager 457def temporary_index_file(tree=None): 458 """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting 459 the file afterward.""" 460 index_path = create_temporary_index(tree) 461 old_index_path = os.environ.get('GIT_INDEX_FILE') 462 os.environ['GIT_INDEX_FILE'] = index_path 463 try: 464 yield 465 finally: 466 if old_index_path is None: 467 del os.environ['GIT_INDEX_FILE'] 468 else: 469 os.environ['GIT_INDEX_FILE'] = old_index_path 470 os.remove(index_path) 471 472 473def create_temporary_index(tree=None): 474 """Create a temporary index file and return the created file's path. 475 476 If `tree` is not None, use that as the tree to read in. Otherwise, an 477 empty index is created.""" 478 gitdir = run('git', 'rev-parse', '--git-dir') 479 path = os.path.join(gitdir, temp_index_basename) 480 if tree is None: 481 tree = '--empty' 482 run('git', 'read-tree', '--index-output='+path, tree) 483 return path 484 485 486def print_diff(old_tree, new_tree): 487 """Print the diff between the two trees to stdout.""" 488 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output 489 # is expected to be viewed by the user, and only the former does nice things 490 # like color and pagination. 491 # 492 # We also only print modified files since `new_tree` only contains the files 493 # that were modified, so unmodified files would show as deleted without the 494 # filter. 495 subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree, 496 '--']) 497 498 499def apply_changes(old_tree, new_tree, force=False, patch_mode=False): 500 """Apply the changes in `new_tree` to the working directory. 501 502 Bails if there are local changes in those files and not `force`. If 503 `patch_mode`, runs `git checkout --patch` to select hunks interactively.""" 504 changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z', 505 '--name-only', old_tree, 506 new_tree).rstrip('\0').split('\0') 507 if not force: 508 unstaged_files = run('git', 'diff-files', '--name-status', *changed_files) 509 if unstaged_files: 510 print('The following files would be modified but ' 511 'have unstaged changes:', file=sys.stderr) 512 print(unstaged_files, file=sys.stderr) 513 print('Please commit, stage, or stash them first.', file=sys.stderr) 514 sys.exit(2) 515 if patch_mode: 516 # In patch mode, we could just as well create an index from the new tree 517 # and checkout from that, but then the user will be presented with a 518 # message saying "Discard ... from worktree". Instead, we use the old 519 # tree as the index and checkout from new_tree, which gives the slightly 520 # better message, "Apply ... to index and worktree". This is not quite 521 # right, since it won't be applied to the user's index, but oh well. 522 with temporary_index_file(old_tree): 523 subprocess.check_call(['git', 'checkout', '--patch', new_tree]) 524 index_tree = old_tree 525 else: 526 with temporary_index_file(new_tree): 527 run('git', 'checkout-index', '-a', '-f') 528 return changed_files 529 530 531def run(*args, **kwargs): 532 stdin = kwargs.pop('stdin', '') 533 verbose = kwargs.pop('verbose', True) 534 strip = kwargs.pop('strip', True) 535 for name in kwargs: 536 raise TypeError("run() got an unexpected keyword argument '%s'" % name) 537 p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, 538 stdin=subprocess.PIPE) 539 stdout, stderr = p.communicate(input=stdin) 540 541 stdout = convert_string(stdout) 542 stderr = convert_string(stderr) 543 544 if p.returncode == 0: 545 if stderr: 546 if verbose: 547 print('`%s` printed to stderr:' % ' '.join(args), file=sys.stderr) 548 print(stderr.rstrip(), file=sys.stderr) 549 if strip: 550 stdout = stdout.rstrip('\r\n') 551 return stdout 552 if verbose: 553 print('`%s` returned %s' % (' '.join(args), p.returncode), file=sys.stderr) 554 if stderr: 555 print(stderr.rstrip(), file=sys.stderr) 556 sys.exit(2) 557 558 559def die(message): 560 print('error:', message, file=sys.stderr) 561 sys.exit(2) 562 563 564def to_bytes(str_input): 565 # Encode to UTF-8 to get binary data. 566 if isinstance(str_input, bytes): 567 return str_input 568 return str_input.encode('utf-8') 569 570 571def to_string(bytes_input): 572 if isinstance(bytes_input, str): 573 return bytes_input 574 return bytes_input.encode('utf-8') 575 576 577def convert_string(bytes_input): 578 try: 579 return to_string(bytes_input.decode('utf-8')) 580 except AttributeError: # 'str' object has no attribute 'decode'. 581 return str(bytes_input) 582 except UnicodeError: 583 return str(bytes_input) 584 585if __name__ == '__main__': 586 main() 587