• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#! /usr/bin/env python
2
3# Released to the public domain, by Tim Peters, 03 October 2000.
4
5"""reindent [-d][-r][-v] [ path ... ]
6
7-d (--dryrun)   Dry run.   Analyze, but don't make any changes to, files.
8-r (--recurse)  Recurse.   Search for all .py files in subdirectories too.
9-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
10-v (--verbose)  Verbose.   Print informative msgs; else no output.
11-h (--help)     Help.      Print this usage information and exit.
12
13Change Python (.py) files to use 4-space indents and no hard tab characters.
14Also trim excess spaces and tabs from ends of lines, and remove empty lines
15at the end of files.  Also ensure the last line ends with a newline.
16
17If no paths are given on the command line, reindent operates as a filter,
18reading a single source file from standard input and writing the transformed
19source to standard output.  In this case, the -d, -r and -v flags are
20ignored.
21
22You can pass one or more file and/or directory paths.  When a directory
23path, all .py files within the directory will be examined, and, if the -r
24option is given, likewise recursively for subdirectories.
25
26If output is not to standard output, reindent overwrites files in place,
27renaming the originals with a .bak extension.  If it finds nothing to
28change, the file is left alone.  If reindent does change a file, the changed
29file is a fixed-point for future runs (i.e., running reindent on the
30resulting .py file won't change it again).
31
32The hard part of reindenting is figuring out what to do with comment
33lines.  So long as the input files get a clean bill of health from
34tabnanny.py, reindent should do a good job.
35
36The backup file is a copy of the one that is being reindented. The ".bak"
37file is generated with shutil.copy(), but some corner cases regarding
38user/group and permissions could leave the backup file more readable that
39you'd prefer. You can always use the --nobackup option to prevent this.
40"""
41
42__version__ = "1"
43
44import tokenize
45import os, shutil
46import sys
47
48verbose    = 0
49recurse    = 0
50dryrun     = 0
51makebackup = True
52
53def usage(msg=None):
54    if msg is not None:
55        print >> sys.stderr, msg
56    print >> sys.stderr, __doc__
57
58def errprint(*args):
59    sep = ""
60    for arg in args:
61        sys.stderr.write(sep + str(arg))
62        sep = " "
63    sys.stderr.write("\n")
64
65def main():
66    import getopt
67    global verbose, recurse, dryrun, makebackup
68    try:
69        opts, args = getopt.getopt(sys.argv[1:], "drnvh",
70                        ["dryrun", "recurse", "nobackup", "verbose", "help"])
71    except getopt.error, msg:
72        usage(msg)
73        return
74    for o, a in opts:
75        if o in ('-d', '--dryrun'):
76            dryrun += 1
77        elif o in ('-r', '--recurse'):
78            recurse += 1
79        elif o in ('-n', '--nobackup'):
80            makebackup = False
81        elif o in ('-v', '--verbose'):
82            verbose += 1
83        elif o in ('-h', '--help'):
84            usage()
85            return
86    if not args:
87        r = Reindenter(sys.stdin)
88        r.run()
89        r.write(sys.stdout)
90        return
91    for arg in args:
92        check(arg)
93
94def check(file):
95    if os.path.isdir(file) and not os.path.islink(file):
96        if verbose:
97            print "listing directory", file
98        names = os.listdir(file)
99        for name in names:
100            fullname = os.path.join(file, name)
101            if ((recurse and os.path.isdir(fullname) and
102                 not os.path.islink(fullname) and
103                 not os.path.split(fullname)[1].startswith("."))
104                or name.lower().endswith(".py")):
105                check(fullname)
106        return
107
108    if verbose:
109        print "checking", file, "...",
110    try:
111        f = open(file)
112    except IOError, msg:
113        errprint("%s: I/O Error: %s" % (file, str(msg)))
114        return
115
116    r = Reindenter(f)
117    f.close()
118    if r.run():
119        if verbose:
120            print "changed."
121            if dryrun:
122                print "But this is a dry run, so leaving it alone."
123        if not dryrun:
124            bak = file + ".bak"
125            if makebackup:
126                shutil.copyfile(file, bak)
127                if verbose:
128                    print "backed up", file, "to", bak
129            f = open(file, "w")
130            r.write(f)
131            f.close()
132            if verbose:
133                print "wrote new", file
134        return True
135    else:
136        if verbose:
137            print "unchanged."
138        return False
139
140def _rstrip(line, JUNK='\n \t'):
141    """Return line stripped of trailing spaces, tabs, newlines.
142
143    Note that line.rstrip() instead also strips sundry control characters,
144    but at least one known Emacs user expects to keep junk like that, not
145    mentioning Barry by name or anything <wink>.
146    """
147
148    i = len(line)
149    while i > 0 and line[i-1] in JUNK:
150        i -= 1
151    return line[:i]
152
153class Reindenter:
154
155    def __init__(self, f):
156        self.find_stmt = 1  # next token begins a fresh stmt?
157        self.level = 0      # current indent level
158
159        # Raw file lines.
160        self.raw = f.readlines()
161
162        # File lines, rstripped & tab-expanded.  Dummy at start is so
163        # that we can use tokenize's 1-based line numbering easily.
164        # Note that a line is all-blank iff it's "\n".
165        self.lines = [_rstrip(line).expandtabs() + "\n"
166                      for line in self.raw]
167        self.lines.insert(0, None)
168        self.index = 1  # index into self.lines of next line
169
170        # List of (lineno, indentlevel) pairs, one for each stmt and
171        # comment line.  indentlevel is -1 for comment lines, as a
172        # signal that tokenize doesn't know what to do about them;
173        # indeed, they're our headache!
174        self.stats = []
175
176    def run(self):
177        tokenize.tokenize(self.getline, self.tokeneater)
178        # Remove trailing empty lines.
179        lines = self.lines
180        while lines and lines[-1] == "\n":
181            lines.pop()
182        # Sentinel.
183        stats = self.stats
184        stats.append((len(lines), 0))
185        # Map count of leading spaces to # we want.
186        have2want = {}
187        # Program after transformation.
188        after = self.after = []
189        # Copy over initial empty lines -- there's nothing to do until
190        # we see a line with *something* on it.
191        i = stats[0][0]
192        after.extend(lines[1:i])
193        for i in range(len(stats)-1):
194            thisstmt, thislevel = stats[i]
195            nextstmt = stats[i+1][0]
196            have = getlspace(lines[thisstmt])
197            want = thislevel * 4
198            if want < 0:
199                # A comment line.
200                if have:
201                    # An indented comment line.  If we saw the same
202                    # indentation before, reuse what it most recently
203                    # mapped to.
204                    want = have2want.get(have, -1)
205                    if want < 0:
206                        # Then it probably belongs to the next real stmt.
207                        for j in xrange(i+1, len(stats)-1):
208                            jline, jlevel = stats[j]
209                            if jlevel >= 0:
210                                if have == getlspace(lines[jline]):
211                                    want = jlevel * 4
212                                break
213                    if want < 0:           # Maybe it's a hanging
214                                           # comment like this one,
215                        # in which case we should shift it like its base
216                        # line got shifted.
217                        for j in xrange(i-1, -1, -1):
218                            jline, jlevel = stats[j]
219                            if jlevel >= 0:
220                                want = have + getlspace(after[jline-1]) - \
221                                       getlspace(lines[jline])
222                                break
223                    if want < 0:
224                        # Still no luck -- leave it alone.
225                        want = have
226                else:
227                    want = 0
228            assert want >= 0
229            have2want[have] = want
230            diff = want - have
231            if diff == 0 or have == 0:
232                after.extend(lines[thisstmt:nextstmt])
233            else:
234                for line in lines[thisstmt:nextstmt]:
235                    if diff > 0:
236                        if line == "\n":
237                            after.append(line)
238                        else:
239                            after.append(" " * diff + line)
240                    else:
241                        remove = min(getlspace(line), -diff)
242                        after.append(line[remove:])
243        return self.raw != self.after
244
245    def write(self, f):
246        f.writelines(self.after)
247
248    # Line-getter for tokenize.
249    def getline(self):
250        if self.index >= len(self.lines):
251            line = ""
252        else:
253            line = self.lines[self.index]
254            self.index += 1
255        return line
256
257    # Line-eater for tokenize.
258    def tokeneater(self, type, token, (sline, scol), end, line,
259                   INDENT=tokenize.INDENT,
260                   DEDENT=tokenize.DEDENT,
261                   NEWLINE=tokenize.NEWLINE,
262                   COMMENT=tokenize.COMMENT,
263                   NL=tokenize.NL):
264
265        if type == NEWLINE:
266            # A program statement, or ENDMARKER, will eventually follow,
267            # after some (possibly empty) run of tokens of the form
268            #     (NL | COMMENT)* (INDENT | DEDENT+)?
269            self.find_stmt = 1
270
271        elif type == INDENT:
272            self.find_stmt = 1
273            self.level += 1
274
275        elif type == DEDENT:
276            self.find_stmt = 1
277            self.level -= 1
278
279        elif type == COMMENT:
280            if self.find_stmt:
281                self.stats.append((sline, -1))
282                # but we're still looking for a new stmt, so leave
283                # find_stmt alone
284
285        elif type == NL:
286            pass
287
288        elif self.find_stmt:
289            # This is the first "real token" following a NEWLINE, so it
290            # must be the first token of the next program statement, or an
291            # ENDMARKER.
292            self.find_stmt = 0
293            if line:   # not endmarker
294                self.stats.append((sline, self.level))
295
296# Count number of leading blanks.
297def getlspace(line):
298    i, n = 0, len(line)
299    while i < n and line[i] == " ":
300        i += 1
301    return i
302
303if __name__ == '__main__':
304    main()
305