• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#! /usr/bin/env python3
2
3"""The Tab Nanny despises ambiguous indentation.  She knows no mercy.
4
5tabnanny -- Detection of ambiguous indentation
6
7For the time being this module is intended to be called as a script.
8However it is possible to import it into an IDE and use the function
9check() described below.
10
11Warning: The API provided by this module is likely to change in future
12releases; such changes may not be backward compatible.
13"""
14
15# Released to the public domain, by Tim Peters, 15 April 1998.
16
17# XXX Note: this is now a standard library module.
18# XXX The API needs to undergo changes however; the current code is too
19# XXX script-like.  This will be addressed later.
20
21__version__ = "6"
22
23import os
24import sys
25import getopt
26import tokenize
27if not hasattr(tokenize, 'NL'):
28    raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
29
30__all__ = ["check", "NannyNag", "process_tokens"]
31
32verbose = 0
33filename_only = 0
34
35def errprint(*args):
36    sep = ""
37    for arg in args:
38        sys.stderr.write(sep + str(arg))
39        sep = " "
40    sys.stderr.write("\n")
41
42def main():
43    global verbose, filename_only
44    try:
45        opts, args = getopt.getopt(sys.argv[1:], "qv")
46    except getopt.error as msg:
47        errprint(msg)
48        return
49    for o, a in opts:
50        if o == '-q':
51            filename_only = filename_only + 1
52        if o == '-v':
53            verbose = verbose + 1
54    if not args:
55        errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
56        return
57    for arg in args:
58        check(arg)
59
60class NannyNag(Exception):
61    """
62    Raised by tokeneater() if detecting an ambiguous indent.
63    Captured and handled in check().
64    """
65    def __init__(self, lineno, msg, line):
66        self.lineno, self.msg, self.line = lineno, msg, line
67    def get_lineno(self):
68        return self.lineno
69    def get_msg(self):
70        return self.msg
71    def get_line(self):
72        return self.line
73
74def check(file):
75    """check(file_or_dir)
76
77    If file_or_dir is a directory and not a symbolic link, then recursively
78    descend the directory tree named by file_or_dir, checking all .py files
79    along the way. If file_or_dir is an ordinary Python source file, it is
80    checked for whitespace related problems. The diagnostic messages are
81    written to standard output using the print statement.
82    """
83
84    if os.path.isdir(file) and not os.path.islink(file):
85        if verbose:
86            print("%r: listing directory" % (file,))
87        names = os.listdir(file)
88        for name in names:
89            fullname = os.path.join(file, name)
90            if (os.path.isdir(fullname) and
91                not os.path.islink(fullname) or
92                os.path.normcase(name[-3:]) == ".py"):
93                check(fullname)
94        return
95
96    try:
97        f = tokenize.open(file)
98    except OSError as msg:
99        errprint("%r: I/O Error: %s" % (file, msg))
100        return
101
102    if verbose > 1:
103        print("checking %r ..." % file)
104
105    try:
106        process_tokens(tokenize.generate_tokens(f.readline))
107
108    except tokenize.TokenError as msg:
109        errprint("%r: Token Error: %s" % (file, msg))
110        return
111
112    except IndentationError as msg:
113        errprint("%r: Indentation Error: %s" % (file, msg))
114        return
115
116    except NannyNag as nag:
117        badline = nag.get_lineno()
118        line = nag.get_line()
119        if verbose:
120            print("%r: *** Line %d: trouble in tab city! ***" % (file, badline))
121            print("offending line: %r" % (line,))
122            print(nag.get_msg())
123        else:
124            if ' ' in file: file = '"' + file + '"'
125            if filename_only: print(file)
126            else: print(file, badline, repr(line))
127        return
128
129    finally:
130        f.close()
131
132    if verbose:
133        print("%r: Clean bill of health." % (file,))
134
135class Whitespace:
136    # the characters used for space and tab
137    S, T = ' \t'
138
139    # members:
140    #   raw
141    #       the original string
142    #   n
143    #       the number of leading whitespace characters in raw
144    #   nt
145    #       the number of tabs in raw[:n]
146    #   norm
147    #       the normal form as a pair (count, trailing), where:
148    #       count
149    #           a tuple such that raw[:n] contains count[i]
150    #           instances of S * i + T
151    #       trailing
152    #           the number of trailing spaces in raw[:n]
153    #       It's A Theorem that m.indent_level(t) ==
154    #       n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
155    #   is_simple
156    #       true iff raw[:n] is of the form (T*)(S*)
157
158    def __init__(self, ws):
159        self.raw  = ws
160        S, T = Whitespace.S, Whitespace.T
161        count = []
162        b = n = nt = 0
163        for ch in self.raw:
164            if ch == S:
165                n = n + 1
166                b = b + 1
167            elif ch == T:
168                n = n + 1
169                nt = nt + 1
170                if b >= len(count):
171                    count = count + [0] * (b - len(count) + 1)
172                count[b] = count[b] + 1
173                b = 0
174            else:
175                break
176        self.n    = n
177        self.nt   = nt
178        self.norm = tuple(count), b
179        self.is_simple = len(count) <= 1
180
181    # return length of longest contiguous run of spaces (whether or not
182    # preceding a tab)
183    def longest_run_of_spaces(self):
184        count, trailing = self.norm
185        return max(len(count)-1, trailing)
186
187    def indent_level(self, tabsize):
188        # count, il = self.norm
189        # for i in range(len(count)):
190        #    if count[i]:
191        #        il = il + (i//tabsize + 1)*tabsize * count[i]
192        # return il
193
194        # quicker:
195        # il = trailing + sum (i//ts + 1)*ts*count[i] =
196        # trailing + ts * sum (i//ts + 1)*count[i] =
197        # trailing + ts * sum i//ts*count[i] + count[i] =
198        # trailing + ts * [(sum i//ts*count[i]) + (sum count[i])] =
199        # trailing + ts * [(sum i//ts*count[i]) + num_tabs]
200        # and note that i//ts*count[i] is 0 when i < ts
201
202        count, trailing = self.norm
203        il = 0
204        for i in range(tabsize, len(count)):
205            il = il + i//tabsize * count[i]
206        return trailing + tabsize * (il + self.nt)
207
208    # return true iff self.indent_level(t) == other.indent_level(t)
209    # for all t >= 1
210    def equal(self, other):
211        return self.norm == other.norm
212
213    # return a list of tuples (ts, i1, i2) such that
214    # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
215    # Intended to be used after not self.equal(other) is known, in which
216    # case it will return at least one witnessing tab size.
217    def not_equal_witness(self, other):
218        n = max(self.longest_run_of_spaces(),
219                other.longest_run_of_spaces()) + 1
220        a = []
221        for ts in range(1, n+1):
222            if self.indent_level(ts) != other.indent_level(ts):
223                a.append( (ts,
224                           self.indent_level(ts),
225                           other.indent_level(ts)) )
226        return a
227
228    # Return True iff self.indent_level(t) < other.indent_level(t)
229    # for all t >= 1.
230    # The algorithm is due to Vincent Broman.
231    # Easy to prove it's correct.
232    # XXXpost that.
233    # Trivial to prove n is sharp (consider T vs ST).
234    # Unknown whether there's a faster general way.  I suspected so at
235    # first, but no longer.
236    # For the special (but common!) case where M and N are both of the
237    # form (T*)(S*), M.less(N) iff M.len() < N.len() and
238    # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
239    # XXXwrite that up.
240    # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
241    def less(self, other):
242        if self.n >= other.n:
243            return False
244        if self.is_simple and other.is_simple:
245            return self.nt <= other.nt
246        n = max(self.longest_run_of_spaces(),
247                other.longest_run_of_spaces()) + 1
248        # the self.n >= other.n test already did it for ts=1
249        for ts in range(2, n+1):
250            if self.indent_level(ts) >= other.indent_level(ts):
251                return False
252        return True
253
254    # return a list of tuples (ts, i1, i2) such that
255    # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
256    # Intended to be used after not self.less(other) is known, in which
257    # case it will return at least one witnessing tab size.
258    def not_less_witness(self, other):
259        n = max(self.longest_run_of_spaces(),
260                other.longest_run_of_spaces()) + 1
261        a = []
262        for ts in range(1, n+1):
263            if self.indent_level(ts) >= other.indent_level(ts):
264                a.append( (ts,
265                           self.indent_level(ts),
266                           other.indent_level(ts)) )
267        return a
268
269def format_witnesses(w):
270    firsts = (str(tup[0]) for tup in w)
271    prefix = "at tab size"
272    if len(w) > 1:
273        prefix = prefix + "s"
274    return prefix + " " + ', '.join(firsts)
275
276def process_tokens(tokens):
277    INDENT = tokenize.INDENT
278    DEDENT = tokenize.DEDENT
279    NEWLINE = tokenize.NEWLINE
280    JUNK = tokenize.COMMENT, tokenize.NL
281    indents = [Whitespace("")]
282    check_equal = 0
283
284    for (type, token, start, end, line) in tokens:
285        if type == NEWLINE:
286            # a program statement, or ENDMARKER, will eventually follow,
287            # after some (possibly empty) run of tokens of the form
288            #     (NL | COMMENT)* (INDENT | DEDENT+)?
289            # If an INDENT appears, setting check_equal is wrong, and will
290            # be undone when we see the INDENT.
291            check_equal = 1
292
293        elif type == INDENT:
294            check_equal = 0
295            thisguy = Whitespace(token)
296            if not indents[-1].less(thisguy):
297                witness = indents[-1].not_less_witness(thisguy)
298                msg = "indent not greater e.g. " + format_witnesses(witness)
299                raise NannyNag(start[0], msg, line)
300            indents.append(thisguy)
301
302        elif type == DEDENT:
303            # there's nothing we need to check here!  what's important is
304            # that when the run of DEDENTs ends, the indentation of the
305            # program statement (or ENDMARKER) that triggered the run is
306            # equal to what's left at the top of the indents stack
307
308            # Ouch!  This assert triggers if the last line of the source
309            # is indented *and* lacks a newline -- then DEDENTs pop out
310            # of thin air.
311            # assert check_equal  # else no earlier NEWLINE, or an earlier INDENT
312            check_equal = 1
313
314            del indents[-1]
315
316        elif check_equal and type not in JUNK:
317            # this is the first "real token" following a NEWLINE, so it
318            # must be the first token of the next program statement, or an
319            # ENDMARKER; the "line" argument exposes the leading whitespace
320            # for this statement; in the case of ENDMARKER, line is an empty
321            # string, so will properly match the empty string with which the
322            # "indents" stack was seeded
323            check_equal = 0
324            thisguy = Whitespace(line)
325            if not indents[-1].equal(thisguy):
326                witness = indents[-1].not_equal_witness(thisguy)
327                msg = "indent not equal e.g. " + format_witnesses(witness)
328                raise NannyNag(start[0], msg, line)
329
330
331if __name__ == '__main__':
332    main()
333