• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#! /usr/bin/env python
2
3# Released to the public domain, by Tim Peters, 03 October 2000.
4
5"""reindent [-d][-r][-v] [ path ... ]
6
7-d (--dryrun)   Dry run.   Analyze, but don't make any changes to, files.
8-r (--recurse)  Recurse.   Search for all .py files in subdirectories too.
9-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
10-v (--verbose)  Verbose.   Print informative msgs; else no output.
11-h (--help)     Help.      Print this usage information and exit.
12
13Change Python (.py) files to use 4-space indents and no hard tab characters.
14Also trim excess spaces and tabs from ends of lines, and remove empty lines
15at the end of files.  Also ensure the last line ends with a newline.
16
17If no paths are given on the command line, reindent operates as a filter,
18reading a single source file from standard input and writing the transformed
19source to standard output.  In this case, the -d, -r and -v flags are
20ignored.
21
22You can pass one or more file and/or directory paths.  When a directory
23path, all .py files within the directory will be examined, and, if the -r
24option is given, likewise recursively for subdirectories.
25
26If output is not to standard output, reindent overwrites files in place,
27renaming the originals with a .bak extension.  If it finds nothing to
28change, the file is left alone.  If reindent does change a file, the changed
29file is a fixed-point for future runs (i.e., running reindent on the
30resulting .py file won't change it again).
31
32The hard part of reindenting is figuring out what to do with comment
33lines.  So long as the input files get a clean bill of health from
34tabnanny.py, reindent should do a good job.
35
36The backup file is a copy of the one that is being reindented. The ".bak"
37file is generated with shutil.copy(), but some corner cases regarding
38user/group and permissions could leave the backup file more readable than
39you'd prefer. You can always use the --nobackup option to prevent this.
40"""
41
42__version__ = "1"
43
44import tokenize
45import os, shutil
46import sys
47import io
48
49verbose    = 0
50recurse    = 0
51dryrun     = 0
52makebackup = True
53
54def usage(msg=None):
55    if msg is not None:
56        print >> sys.stderr, msg
57    print >> sys.stderr, __doc__
58
59def errprint(*args):
60    sep = ""
61    for arg in args:
62        sys.stderr.write(sep + str(arg))
63        sep = " "
64    sys.stderr.write("\n")
65
66def main():
67    import getopt
68    global verbose, recurse, dryrun, makebackup
69    try:
70        opts, args = getopt.getopt(sys.argv[1:], "drnvh",
71                        ["dryrun", "recurse", "nobackup", "verbose", "help"])
72    except getopt.error, msg:
73        usage(msg)
74        return
75    for o, a in opts:
76        if o in ('-d', '--dryrun'):
77            dryrun += 1
78        elif o in ('-r', '--recurse'):
79            recurse += 1
80        elif o in ('-n', '--nobackup'):
81            makebackup = False
82        elif o in ('-v', '--verbose'):
83            verbose += 1
84        elif o in ('-h', '--help'):
85            usage()
86            return
87    if not args:
88        r = Reindenter(sys.stdin)
89        r.run()
90        r.write(sys.stdout)
91        return
92    for arg in args:
93        check(arg)
94
95def check(file):
96    if os.path.isdir(file) and not os.path.islink(file):
97        if verbose:
98            print "listing directory", file
99        names = os.listdir(file)
100        for name in names:
101            fullname = os.path.join(file, name)
102            if ((recurse and os.path.isdir(fullname) and
103                 not os.path.islink(fullname) and
104                 not os.path.split(fullname)[1].startswith("."))
105                or name.lower().endswith(".py")):
106                check(fullname)
107        return
108
109    if verbose:
110        print "checking", file, "...",
111    try:
112        f = open(file, "rb")
113    except IOError, msg:
114        errprint("%s: I/O Error: %s" % (file, str(msg)))
115        return
116
117    r = Reindenter(f)
118    f.close()
119
120    newline = r.newlines
121    if isinstance(newline, tuple):
122        errprint("%s: mixed newlines detected; cannot process file" % file)
123        return
124
125    if r.run():
126        if verbose:
127            print "changed."
128            if dryrun:
129                print "But this is a dry run, so leaving it alone."
130        if not dryrun:
131            bak = file + ".bak"
132            if makebackup:
133                shutil.copyfile(file, bak)
134                if verbose:
135                    print "backed up", file, "to", bak
136            f = open(file, "wb")
137            r.write(f)
138            f.close()
139            if verbose:
140                print "wrote new", file
141        return True
142    else:
143        if verbose:
144            print "unchanged."
145        return False
146
147def _detect_newlines(lines):
148    newlines = {'\r\n' if line[-2:] == '\r\n' else
149                '\n' if line[-1:] == '\n' else
150                '\r' if line[-1:] == '\r' else
151                ''
152                for line in lines}
153    newlines.discard('')
154    newlines = tuple(sorted(newlines))
155    if not newlines:
156        return '\n'
157    if len(newlines) == 1:
158        return newlines[0]
159    return newlines
160
161def _rstrip(line, JUNK='\r\n \t'):
162    """Return line stripped of trailing spaces, tabs, newlines.
163
164    Note that line.rstrip() instead also strips sundry control characters,
165    but at least one known Emacs user expects to keep junk like that, not
166    mentioning Barry by name or anything <wink>.
167    """
168
169    i = len(line)
170    while i > 0 and line[i-1] in JUNK:
171        i -= 1
172    return line[:i]
173
174class Reindenter:
175
176    def __init__(self, f):
177        self.find_stmt = 1  # next token begins a fresh stmt?
178        self.level = 0      # current indent level
179
180        # Raw file lines.
181        self.raw = f.readlines()
182
183        # Save the newlines found in the file so they can be used to
184        #  create output without mutating the newlines.
185        self.newlines = _detect_newlines(self.raw)
186        if isinstance(self.newlines, tuple):
187            self.newline = self.newlines[0]
188        else:
189            self.newline = self.newlines
190
191        # File lines, rstripped & tab-expanded.  Dummy at start is so
192        # that we can use tokenize's 1-based line numbering easily.
193        # Note that a line is all-blank iff it's newline.
194        self.lines = [_rstrip(line).expandtabs() + self.newline
195                      for line in self.raw]
196        self.lines.insert(0, None)
197        self.index = 1  # index into self.lines of next line
198
199        # List of (lineno, indentlevel) pairs, one for each stmt and
200        # comment line.  indentlevel is -1 for comment lines, as a
201        # signal that tokenize doesn't know what to do about them;
202        # indeed, they're our headache!
203        self.stats = []
204
205    def run(self):
206        tokenize.tokenize(self.getline, self.tokeneater)
207        # Remove trailing empty lines.
208        lines = self.lines
209        while lines and lines[-1] == self.newline:
210            lines.pop()
211        # Sentinel.
212        stats = self.stats
213        stats.append((len(lines), 0))
214        # Map count of leading spaces to # we want.
215        have2want = {}
216        # Program after transformation.
217        after = self.after = []
218        # Copy over initial empty lines -- there's nothing to do until
219        # we see a line with *something* on it.
220        i = stats[0][0]
221        after.extend(lines[1:i])
222        for i in range(len(stats)-1):
223            thisstmt, thislevel = stats[i]
224            nextstmt = stats[i+1][0]
225            have = getlspace(lines[thisstmt])
226            want = thislevel * 4
227            if want < 0:
228                # A comment line.
229                if have:
230                    # An indented comment line.  If we saw the same
231                    # indentation before, reuse what it most recently
232                    # mapped to.
233                    want = have2want.get(have, -1)
234                    if want < 0:
235                        # Then it probably belongs to the next real stmt.
236                        for j in xrange(i+1, len(stats)-1):
237                            jline, jlevel = stats[j]
238                            if jlevel >= 0:
239                                if have == getlspace(lines[jline]):
240                                    want = jlevel * 4
241                                break
242                    if want < 0:           # Maybe it's a hanging
243                                           # comment like this one,
244                        # in which case we should shift it like its base
245                        # line got shifted.
246                        for j in xrange(i-1, -1, -1):
247                            jline, jlevel = stats[j]
248                            if jlevel >= 0:
249                                want = have + getlspace(after[jline-1]) - \
250                                       getlspace(lines[jline])
251                                break
252                    if want < 0:
253                        # Still no luck -- leave it alone.
254                        want = have
255                else:
256                    want = 0
257            assert want >= 0
258            have2want[have] = want
259            diff = want - have
260            if diff == 0 or have == 0:
261                after.extend(lines[thisstmt:nextstmt])
262            else:
263                for line in lines[thisstmt:nextstmt]:
264                    if diff > 0:
265                        if line == self.newline:
266                            after.append(line)
267                        else:
268                            after.append(" " * diff + line)
269                    else:
270                        remove = min(getlspace(line), -diff)
271                        after.append(line[remove:])
272        return self.raw != self.after
273
274    def write(self, f):
275        f.writelines(self.after)
276
277    # Line-getter for tokenize.
278    def getline(self):
279        if self.index >= len(self.lines):
280            line = ""
281        else:
282            line = self.lines[self.index]
283            self.index += 1
284        return line
285
286    # Line-eater for tokenize.
287    def tokeneater(self, type, token, (sline, scol), end, line,
288                   INDENT=tokenize.INDENT,
289                   DEDENT=tokenize.DEDENT,
290                   NEWLINE=tokenize.NEWLINE,
291                   COMMENT=tokenize.COMMENT,
292                   NL=tokenize.NL):
293
294        if type == NEWLINE:
295            # A program statement, or ENDMARKER, will eventually follow,
296            # after some (possibly empty) run of tokens of the form
297            #     (NL | COMMENT)* (INDENT | DEDENT+)?
298            self.find_stmt = 1
299
300        elif type == INDENT:
301            self.find_stmt = 1
302            self.level += 1
303
304        elif type == DEDENT:
305            self.find_stmt = 1
306            self.level -= 1
307
308        elif type == COMMENT:
309            if self.find_stmt:
310                self.stats.append((sline, -1))
311                # but we're still looking for a new stmt, so leave
312                # find_stmt alone
313
314        elif type == NL:
315            pass
316
317        elif self.find_stmt:
318            # This is the first "real token" following a NEWLINE, so it
319            # must be the first token of the next program statement, or an
320            # ENDMARKER.
321            self.find_stmt = 0
322            if line:   # not endmarker
323                self.stats.append((sline, self.level))
324
325# Count number of leading blanks.
326def getlspace(line):
327    i, n = 0, len(line)
328    while i < n and line[i] == " ":
329        i += 1
330    return i
331
332if __name__ == '__main__':
333    main()
334