• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2#
3#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9#===------------------------------------------------------------------------===#
10
11r"""
12clang-format git integration
13============================
14
15This file provides a clang-format integration for git. Put it somewhere in your
16path and ensure that it is executable. Then, "git clang-format" will invoke
17clang-format on the changes in current files or a specific commit.
18
19For further details, run:
20git clang-format -h
21
22Requires Python 2.7 or Python 3
23"""
24
25from __future__ import absolute_import, division, print_function
26import argparse
27import collections
28import contextlib
29import errno
30import os
31import re
32import subprocess
33import sys
34
35usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]'
36
37desc = '''
38If zero or one commits are given, run clang-format on all lines that differ
39between the working directory and <commit>, which defaults to HEAD.  Changes are
40only applied to the working directory.
41
42If two commits are given (requires --diff), run clang-format on all lines in the
43second <commit> that differ from the first <commit>.
44
45The following git-config settings set the default of the corresponding option:
46  clangFormat.binary
47  clangFormat.commit
48  clangFormat.extensions
49  clangFormat.style
50'''
51
52# Name of the temporary index file in which save the output of clang-format.
53# This file is created within the .git directory.
54temp_index_basename = 'clang-format-index'
55
56
57Range = collections.namedtuple('Range', 'start, count')
58
59
60def main():
61  config = load_git_config()
62
63  # In order to keep '--' yet allow options after positionals, we need to
64  # check for '--' ourselves.  (Setting nargs='*' throws away the '--', while
65  # nargs=argparse.REMAINDER disallows options after positionals.)
66  argv = sys.argv[1:]
67  try:
68    idx = argv.index('--')
69  except ValueError:
70    dash_dash = []
71  else:
72    dash_dash = argv[idx:]
73    argv = argv[:idx]
74
75  default_extensions = ','.join([
76      # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
77      'c', 'h',  # C
78      'm',  # ObjC
79      'mm',  # ObjC++
80      'cc', 'cp', 'cpp', 'c++', 'cxx', 'hh', 'hpp', 'hxx',  # C++
81      'cu', 'cuh',  # CUDA
82      # Other languages that clang-format supports
83      'proto', 'protodevel',  # Protocol Buffers
84      'java',  # Java
85      'js',  # JavaScript
86      'ts',  # TypeScript
87      'cs',  # C Sharp
88      ])
89
90  p = argparse.ArgumentParser(
91    usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
92    description=desc)
93  p.add_argument('--binary',
94                 default=config.get('clangformat.binary', 'clang-format'),
95                 help='path to clang-format'),
96  p.add_argument('--commit',
97                 default=config.get('clangformat.commit', 'HEAD'),
98                 help='default commit to use if none is specified'),
99  p.add_argument('--diff', action='store_true',
100                 help='print a diff instead of applying the changes')
101  p.add_argument('--extensions',
102                 default=config.get('clangformat.extensions',
103                                    default_extensions),
104                 help=('comma-separated list of file extensions to format, '
105                       'excluding the period and case-insensitive')),
106  p.add_argument('-f', '--force', action='store_true',
107                 help='allow changes to unstaged files')
108  p.add_argument('-p', '--patch', action='store_true',
109                 help='select hunks interactively')
110  p.add_argument('-q', '--quiet', action='count', default=0,
111                 help='print less information')
112  p.add_argument('--style',
113                 default=config.get('clangformat.style', None),
114                 help='passed to clang-format'),
115  p.add_argument('-v', '--verbose', action='count', default=0,
116                 help='print extra information')
117  # We gather all the remaining positional arguments into 'args' since we need
118  # to use some heuristics to determine whether or not <commit> was present.
119  # However, to print pretty messages, we make use of metavar and help.
120  p.add_argument('args', nargs='*', metavar='<commit>',
121                 help='revision from which to compute the diff')
122  p.add_argument('ignored', nargs='*', metavar='<file>...',
123                 help='if specified, only consider differences in these files')
124  opts = p.parse_args(argv)
125
126  opts.verbose -= opts.quiet
127  del opts.quiet
128
129  commits, files = interpret_args(opts.args, dash_dash, opts.commit)
130  if len(commits) > 1:
131    if not opts.diff:
132      die('--diff is required when two commits are given')
133  else:
134    if len(commits) > 2:
135      die('at most two commits allowed; %d given' % len(commits))
136  changed_lines = compute_diff_and_extract_lines(commits, files)
137  if opts.verbose >= 1:
138    ignored_files = set(changed_lines)
139  filter_by_extension(changed_lines, opts.extensions.lower().split(','))
140  if opts.verbose >= 1:
141    ignored_files.difference_update(changed_lines)
142    if ignored_files:
143      print('Ignoring changes in the following files (wrong extension):')
144      for filename in ignored_files:
145        print('    %s' % filename)
146    if changed_lines:
147      print('Running clang-format on the following files:')
148      for filename in changed_lines:
149        print('    %s' % filename)
150  if not changed_lines:
151    if opts.verbose >= 0:
152      print('no modified files to format')
153    return
154  # The computed diff outputs absolute paths, so we must cd before accessing
155  # those files.
156  cd_to_toplevel()
157  if len(commits) > 1:
158    old_tree = commits[1]
159    new_tree = run_clang_format_and_save_to_tree(changed_lines,
160                                                 revision=commits[1],
161                                                 binary=opts.binary,
162                                                 style=opts.style)
163  else:
164    old_tree = create_tree_from_workdir(changed_lines)
165    new_tree = run_clang_format_and_save_to_tree(changed_lines,
166                                                 binary=opts.binary,
167                                                 style=opts.style)
168  if opts.verbose >= 1:
169    print('old tree: %s' % old_tree)
170    print('new tree: %s' % new_tree)
171  if old_tree == new_tree:
172    if opts.verbose >= 0:
173      print('clang-format did not modify any files')
174  elif opts.diff:
175    print_diff(old_tree, new_tree)
176  else:
177    changed_files = apply_changes(old_tree, new_tree, force=opts.force,
178                                  patch_mode=opts.patch)
179    if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
180      print('changed files:')
181      for filename in changed_files:
182        print('    %s' % filename)
183
184
185def load_git_config(non_string_options=None):
186  """Return the git configuration as a dictionary.
187
188  All options are assumed to be strings unless in `non_string_options`, in which
189  is a dictionary mapping option name (in lower case) to either "--bool" or
190  "--int"."""
191  if non_string_options is None:
192    non_string_options = {}
193  out = {}
194  for entry in run('git', 'config', '--list', '--null').split('\0'):
195    if entry:
196      if '\n' in entry:
197        name, value = entry.split('\n', 1)
198      else:
199        # A setting with no '=' ('\n' with --null) is implicitly 'true'
200        name = entry
201        value = 'true'
202      if name in non_string_options:
203        value = run('git', 'config', non_string_options[name], name)
204      out[name] = value
205  return out
206
207
208def interpret_args(args, dash_dash, default_commit):
209  """Interpret `args` as "[commits] [--] [files]" and return (commits, files).
210
211  It is assumed that "--" and everything that follows has been removed from
212  args and placed in `dash_dash`.
213
214  If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its
215  left (if present) are taken as commits.  Otherwise, the arguments are checked
216  from left to right if they are commits or files.  If commits are not given,
217  a list with `default_commit` is used."""
218  if dash_dash:
219    if len(args) == 0:
220      commits = [default_commit]
221    else:
222      commits = args
223    for commit in commits:
224      object_type = get_object_type(commit)
225      if object_type not in ('commit', 'tag'):
226        if object_type is None:
227          die("'%s' is not a commit" % commit)
228        else:
229          die("'%s' is a %s, but a commit was expected" % (commit, object_type))
230    files = dash_dash[1:]
231  elif args:
232    commits = []
233    while args:
234      if not disambiguate_revision(args[0]):
235        break
236      commits.append(args.pop(0))
237    if not commits:
238      commits = [default_commit]
239    files = args
240  else:
241    commits = [default_commit]
242    files = []
243  return commits, files
244
245
246def disambiguate_revision(value):
247  """Returns True if `value` is a revision, False if it is a file, or dies."""
248  # If `value` is ambiguous (neither a commit nor a file), the following
249  # command will die with an appropriate error message.
250  run('git', 'rev-parse', value, verbose=False)
251  object_type = get_object_type(value)
252  if object_type is None:
253    return False
254  if object_type in ('commit', 'tag'):
255    return True
256  die('`%s` is a %s, but a commit or filename was expected' %
257      (value, object_type))
258
259
260def get_object_type(value):
261  """Returns a string description of an object's type, or None if it is not
262  a valid git object."""
263  cmd = ['git', 'cat-file', '-t', value]
264  p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
265  stdout, stderr = p.communicate()
266  if p.returncode != 0:
267    return None
268  return convert_string(stdout.strip())
269
270
271def compute_diff_and_extract_lines(commits, files):
272  """Calls compute_diff() followed by extract_lines()."""
273  diff_process = compute_diff(commits, files)
274  changed_lines = extract_lines(diff_process.stdout)
275  diff_process.stdout.close()
276  diff_process.wait()
277  if diff_process.returncode != 0:
278    # Assume error was already printed to stderr.
279    sys.exit(2)
280  return changed_lines
281
282
283def compute_diff(commits, files):
284  """Return a subprocess object producing the diff from `commits`.
285
286  The return value's `stdin` file object will produce a patch with the
287  differences between the working directory and the first commit if a single
288  one was specified, or the difference between both specified commits, filtered
289  on `files` (if non-empty).  Zero context lines are used in the patch."""
290  git_tool = 'diff-index'
291  if len(commits) > 1:
292    git_tool = 'diff-tree'
293  cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--']
294  cmd.extend(files)
295  p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
296  p.stdin.close()
297  return p
298
299
300def extract_lines(patch_file):
301  """Extract the changed lines in `patch_file`.
302
303  The return value is a dictionary mapping filename to a list of (start_line,
304  line_count) pairs.
305
306  The input must have been produced with ``-U0``, meaning unidiff format with
307  zero lines of context.  The return value is a dict mapping filename to a
308  list of line `Range`s."""
309  matches = {}
310  for line in patch_file:
311    line = convert_string(line)
312    match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
313    if match:
314      filename = match.group(1).rstrip('\r\n')
315    match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
316    if match:
317      start_line = int(match.group(1))
318      line_count = 1
319      if match.group(3):
320        line_count = int(match.group(3))
321      if line_count > 0:
322        matches.setdefault(filename, []).append(Range(start_line, line_count))
323  return matches
324
325
326def filter_by_extension(dictionary, allowed_extensions):
327  """Delete every key in `dictionary` that doesn't have an allowed extension.
328
329  `allowed_extensions` must be a collection of lowercase file extensions,
330  excluding the period."""
331  allowed_extensions = frozenset(allowed_extensions)
332  for filename in list(dictionary.keys()):
333    base_ext = filename.rsplit('.', 1)
334    if len(base_ext) == 1 and '' in allowed_extensions:
335        continue
336    if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
337      del dictionary[filename]
338
339
340def cd_to_toplevel():
341  """Change to the top level of the git repository."""
342  toplevel = run('git', 'rev-parse', '--show-toplevel')
343  os.chdir(toplevel)
344
345
346def create_tree_from_workdir(filenames):
347  """Create a new git tree with the given files from the working directory.
348
349  Returns the object ID (SHA-1) of the created tree."""
350  return create_tree(filenames, '--stdin')
351
352
353def run_clang_format_and_save_to_tree(changed_lines, revision=None,
354                                      binary='clang-format', style=None):
355  """Run clang-format on each file and save the result to a git tree.
356
357  Returns the object ID (SHA-1) of the created tree."""
358  def iteritems(container):
359      try:
360          return container.iteritems() # Python 2
361      except AttributeError:
362          return container.items() # Python 3
363  def index_info_generator():
364    for filename, line_ranges in iteritems(changed_lines):
365      if revision:
366        git_metadata_cmd = ['git', 'ls-tree',
367                            '%s:%s' % (revision, os.path.dirname(filename)),
368                            os.path.basename(filename)]
369        git_metadata = subprocess.Popen(git_metadata_cmd, stdin=subprocess.PIPE,
370                                        stdout=subprocess.PIPE)
371        stdout = git_metadata.communicate()[0]
372        mode = oct(int(stdout.split()[0], 8))
373      else:
374        mode = oct(os.stat(filename).st_mode)
375      # Adjust python3 octal format so that it matches what git expects
376      if mode.startswith('0o'):
377          mode = '0' + mode[2:]
378      blob_id = clang_format_to_blob(filename, line_ranges,
379                                     revision=revision,
380                                     binary=binary,
381                                     style=style)
382      yield '%s %s\t%s' % (mode, blob_id, filename)
383  return create_tree(index_info_generator(), '--index-info')
384
385
386def create_tree(input_lines, mode):
387  """Create a tree object from the given input.
388
389  If mode is '--stdin', it must be a list of filenames.  If mode is
390  '--index-info' is must be a list of values suitable for "git update-index
391  --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>".  Any other mode
392  is invalid."""
393  assert mode in ('--stdin', '--index-info')
394  cmd = ['git', 'update-index', '--add', '-z', mode]
395  with temporary_index_file():
396    p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
397    for line in input_lines:
398      p.stdin.write(to_bytes('%s\0' % line))
399    p.stdin.close()
400    if p.wait() != 0:
401      die('`%s` failed' % ' '.join(cmd))
402    tree_id = run('git', 'write-tree')
403    return tree_id
404
405
406def clang_format_to_blob(filename, line_ranges, revision=None,
407                         binary='clang-format', style=None):
408  """Run clang-format on the given file and save the result to a git blob.
409
410  Runs on the file in `revision` if not None, or on the file in the working
411  directory if `revision` is None.
412
413  Returns the object ID (SHA-1) of the created blob."""
414  clang_format_cmd = [binary]
415  if style:
416    clang_format_cmd.extend(['-style='+style])
417  clang_format_cmd.extend([
418      '-lines=%s:%s' % (start_line, start_line+line_count-1)
419      for start_line, line_count in line_ranges])
420  if revision:
421    clang_format_cmd.extend(['-assume-filename='+filename])
422    git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)]
423    git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE,
424                                stdout=subprocess.PIPE)
425    git_show.stdin.close()
426    clang_format_stdin = git_show.stdout
427  else:
428    clang_format_cmd.extend([filename])
429    git_show = None
430    clang_format_stdin = subprocess.PIPE
431  try:
432    clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin,
433                                    stdout=subprocess.PIPE)
434    if clang_format_stdin == subprocess.PIPE:
435      clang_format_stdin = clang_format.stdin
436  except OSError as e:
437    if e.errno == errno.ENOENT:
438      die('cannot find executable "%s"' % binary)
439    else:
440      raise
441  clang_format_stdin.close()
442  hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
443  hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
444                                 stdout=subprocess.PIPE)
445  clang_format.stdout.close()
446  stdout = hash_object.communicate()[0]
447  if hash_object.returncode != 0:
448    die('`%s` failed' % ' '.join(hash_object_cmd))
449  if clang_format.wait() != 0:
450    die('`%s` failed' % ' '.join(clang_format_cmd))
451  if git_show and git_show.wait() != 0:
452    die('`%s` failed' % ' '.join(git_show_cmd))
453  return convert_string(stdout).rstrip('\r\n')
454
455
456@contextlib.contextmanager
457def temporary_index_file(tree=None):
458  """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
459  the file afterward."""
460  index_path = create_temporary_index(tree)
461  old_index_path = os.environ.get('GIT_INDEX_FILE')
462  os.environ['GIT_INDEX_FILE'] = index_path
463  try:
464    yield
465  finally:
466    if old_index_path is None:
467      del os.environ['GIT_INDEX_FILE']
468    else:
469      os.environ['GIT_INDEX_FILE'] = old_index_path
470    os.remove(index_path)
471
472
473def create_temporary_index(tree=None):
474  """Create a temporary index file and return the created file's path.
475
476  If `tree` is not None, use that as the tree to read in.  Otherwise, an
477  empty index is created."""
478  gitdir = run('git', 'rev-parse', '--git-dir')
479  path = os.path.join(gitdir, temp_index_basename)
480  if tree is None:
481    tree = '--empty'
482  run('git', 'read-tree', '--index-output='+path, tree)
483  return path
484
485
486def print_diff(old_tree, new_tree):
487  """Print the diff between the two trees to stdout."""
488  # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
489  # is expected to be viewed by the user, and only the former does nice things
490  # like color and pagination.
491  #
492  # We also only print modified files since `new_tree` only contains the files
493  # that were modified, so unmodified files would show as deleted without the
494  # filter.
495  subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree,
496                         '--'])
497
498
499def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
500  """Apply the changes in `new_tree` to the working directory.
501
502  Bails if there are local changes in those files and not `force`.  If
503  `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
504  changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z',
505                      '--name-only', old_tree,
506                      new_tree).rstrip('\0').split('\0')
507  if not force:
508    unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
509    if unstaged_files:
510      print('The following files would be modified but '
511                'have unstaged changes:', file=sys.stderr)
512      print(unstaged_files, file=sys.stderr)
513      print('Please commit, stage, or stash them first.', file=sys.stderr)
514      sys.exit(2)
515  if patch_mode:
516    # In patch mode, we could just as well create an index from the new tree
517    # and checkout from that, but then the user will be presented with a
518    # message saying "Discard ... from worktree".  Instead, we use the old
519    # tree as the index and checkout from new_tree, which gives the slightly
520    # better message, "Apply ... to index and worktree".  This is not quite
521    # right, since it won't be applied to the user's index, but oh well.
522    with temporary_index_file(old_tree):
523      subprocess.check_call(['git', 'checkout', '--patch', new_tree])
524    index_tree = old_tree
525  else:
526    with temporary_index_file(new_tree):
527      run('git', 'checkout-index', '-a', '-f')
528  return changed_files
529
530
531def run(*args, **kwargs):
532  stdin = kwargs.pop('stdin', '')
533  verbose = kwargs.pop('verbose', True)
534  strip = kwargs.pop('strip', True)
535  for name in kwargs:
536    raise TypeError("run() got an unexpected keyword argument '%s'" % name)
537  p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
538                       stdin=subprocess.PIPE)
539  stdout, stderr = p.communicate(input=stdin)
540
541  stdout = convert_string(stdout)
542  stderr = convert_string(stderr)
543
544  if p.returncode == 0:
545    if stderr:
546      if verbose:
547        print('`%s` printed to stderr:' % ' '.join(args), file=sys.stderr)
548      print(stderr.rstrip(), file=sys.stderr)
549    if strip:
550      stdout = stdout.rstrip('\r\n')
551    return stdout
552  if verbose:
553    print('`%s` returned %s' % (' '.join(args), p.returncode), file=sys.stderr)
554  if stderr:
555    print(stderr.rstrip(), file=sys.stderr)
556  sys.exit(2)
557
558
559def die(message):
560  print('error:', message, file=sys.stderr)
561  sys.exit(2)
562
563
564def to_bytes(str_input):
565    # Encode to UTF-8 to get binary data.
566    if isinstance(str_input, bytes):
567        return str_input
568    return str_input.encode('utf-8')
569
570
571def to_string(bytes_input):
572    if isinstance(bytes_input, str):
573        return bytes_input
574    return bytes_input.encode('utf-8')
575
576
577def convert_string(bytes_input):
578    try:
579        return to_string(bytes_input.decode('utf-8'))
580    except AttributeError: # 'str' object has no attribute 'decode'.
581        return str(bytes_input)
582    except UnicodeError:
583        return str(bytes_input)
584
585if __name__ == '__main__':
586  main()
587