• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#! /usr/bin/env python3
2
3"""The Tab Nanny despises ambiguous indentation.  She knows no mercy.
4
5tabnanny -- Detection of ambiguous indentation
6
7For the time being this module is intended to be called as a script.
8However it is possible to import it into an IDE and use the function
9check() described below.
10
11Warning: The API provided by this module is likely to change in future
12releases; such changes may not be backward compatible.
13"""
14
15# Released to the public domain, by Tim Peters, 15 April 1998.
16
17# XXX Note: this is now a standard library module.
18# XXX The API needs to undergo changes however; the current code is too
19# XXX script-like.  This will be addressed later.
20
21__version__ = "6"
22
23import os
24import sys
25import tokenize
26
27__all__ = ["check", "NannyNag", "process_tokens"]
28
29verbose = 0
30filename_only = 0
31
32def errprint(*args):
33    sep = ""
34    for arg in args:
35        sys.stderr.write(sep + str(arg))
36        sep = " "
37    sys.stderr.write("\n")
38    sys.exit(1)
39
40def main():
41    import getopt
42
43    global verbose, filename_only
44    try:
45        opts, args = getopt.getopt(sys.argv[1:], "qv")
46    except getopt.error as msg:
47        errprint(msg)
48    for o, a in opts:
49        if o == '-q':
50            filename_only = filename_only + 1
51        if o == '-v':
52            verbose = verbose + 1
53    if not args:
54        errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
55    for arg in args:
56        check(arg)
57
58class NannyNag(Exception):
59    """
60    Raised by process_tokens() if detecting an ambiguous indent.
61    Captured and handled in check().
62    """
63    def __init__(self, lineno, msg, line):
64        self.lineno, self.msg, self.line = lineno, msg, line
65    def get_lineno(self):
66        return self.lineno
67    def get_msg(self):
68        return self.msg
69    def get_line(self):
70        return self.line
71
72def check(file):
73    """check(file_or_dir)
74
75    If file_or_dir is a directory and not a symbolic link, then recursively
76    descend the directory tree named by file_or_dir, checking all .py files
77    along the way. If file_or_dir is an ordinary Python source file, it is
78    checked for whitespace related problems. The diagnostic messages are
79    written to standard output using the print statement.
80    """
81
82    if os.path.isdir(file) and not os.path.islink(file):
83        if verbose:
84            print("%r: listing directory" % (file,))
85        names = os.listdir(file)
86        for name in names:
87            fullname = os.path.join(file, name)
88            if (os.path.isdir(fullname) and
89                not os.path.islink(fullname) or
90                os.path.normcase(name[-3:]) == ".py"):
91                check(fullname)
92        return
93
94    try:
95        f = tokenize.open(file)
96    except OSError as msg:
97        errprint("%r: I/O Error: %s" % (file, msg))
98        return
99
100    if verbose > 1:
101        print("checking %r ..." % file)
102
103    try:
104        process_tokens(tokenize.generate_tokens(f.readline))
105
106    except tokenize.TokenError as msg:
107        errprint("%r: Token Error: %s" % (file, msg))
108        return
109
110    except IndentationError as msg:
111        errprint("%r: Indentation Error: %s" % (file, msg))
112        return
113
114    except SyntaxError as msg:
115        errprint("%r: Syntax Error: %s" % (file, msg))
116        return
117
118    except NannyNag as nag:
119        badline = nag.get_lineno()
120        line = nag.get_line()
121        if verbose:
122            print("%r: *** Line %d: trouble in tab city! ***" % (file, badline))
123            print("offending line: %r" % (line,))
124            print(nag.get_msg())
125        else:
126            if ' ' in file: file = '"' + file + '"'
127            if filename_only: print(file)
128            else: print(file, badline, repr(line))
129        return
130
131    finally:
132        f.close()
133
134    if verbose:
135        print("%r: Clean bill of health." % (file,))
136
137class Whitespace:
138    # the characters used for space and tab
139    S, T = ' \t'
140
141    # members:
142    #   raw
143    #       the original string
144    #   n
145    #       the number of leading whitespace characters in raw
146    #   nt
147    #       the number of tabs in raw[:n]
148    #   norm
149    #       the normal form as a pair (count, trailing), where:
150    #       count
151    #           a tuple such that raw[:n] contains count[i]
152    #           instances of S * i + T
153    #       trailing
154    #           the number of trailing spaces in raw[:n]
155    #       It's A Theorem that m.indent_level(t) ==
156    #       n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
157    #   is_simple
158    #       true iff raw[:n] is of the form (T*)(S*)
159
160    def __init__(self, ws):
161        self.raw  = ws
162        S, T = Whitespace.S, Whitespace.T
163        count = []
164        b = n = nt = 0
165        for ch in self.raw:
166            if ch == S:
167                n = n + 1
168                b = b + 1
169            elif ch == T:
170                n = n + 1
171                nt = nt + 1
172                if b >= len(count):
173                    count = count + [0] * (b - len(count) + 1)
174                count[b] = count[b] + 1
175                b = 0
176            else:
177                break
178        self.n    = n
179        self.nt   = nt
180        self.norm = tuple(count), b
181        self.is_simple = len(count) <= 1
182
183    # return length of longest contiguous run of spaces (whether or not
184    # preceding a tab)
185    def longest_run_of_spaces(self):
186        count, trailing = self.norm
187        return max(len(count)-1, trailing)
188
189    def indent_level(self, tabsize):
190        # count, il = self.norm
191        # for i in range(len(count)):
192        #    if count[i]:
193        #        il = il + (i//tabsize + 1)*tabsize * count[i]
194        # return il
195
196        # quicker:
197        # il = trailing + sum (i//ts + 1)*ts*count[i] =
198        # trailing + ts * sum (i//ts + 1)*count[i] =
199        # trailing + ts * sum i//ts*count[i] + count[i] =
200        # trailing + ts * [(sum i//ts*count[i]) + (sum count[i])] =
201        # trailing + ts * [(sum i//ts*count[i]) + num_tabs]
202        # and note that i//ts*count[i] is 0 when i < ts
203
204        count, trailing = self.norm
205        il = 0
206        for i in range(tabsize, len(count)):
207            il = il + i//tabsize * count[i]
208        return trailing + tabsize * (il + self.nt)
209
210    # return true iff self.indent_level(t) == other.indent_level(t)
211    # for all t >= 1
212    def equal(self, other):
213        return self.norm == other.norm
214
215    # return a list of tuples (ts, i1, i2) such that
216    # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
217    # Intended to be used after not self.equal(other) is known, in which
218    # case it will return at least one witnessing tab size.
219    def not_equal_witness(self, other):
220        n = max(self.longest_run_of_spaces(),
221                other.longest_run_of_spaces()) + 1
222        a = []
223        for ts in range(1, n+1):
224            if self.indent_level(ts) != other.indent_level(ts):
225                a.append( (ts,
226                           self.indent_level(ts),
227                           other.indent_level(ts)) )
228        return a
229
230    # Return True iff self.indent_level(t) < other.indent_level(t)
231    # for all t >= 1.
232    # The algorithm is due to Vincent Broman.
233    # Easy to prove it's correct.
234    # XXXpost that.
235    # Trivial to prove n is sharp (consider T vs ST).
236    # Unknown whether there's a faster general way.  I suspected so at
237    # first, but no longer.
238    # For the special (but common!) case where M and N are both of the
239    # form (T*)(S*), M.less(N) iff M.len() < N.len() and
240    # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
241    # XXXwrite that up.
242    # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
243    def less(self, other):
244        if self.n >= other.n:
245            return False
246        if self.is_simple and other.is_simple:
247            return self.nt <= other.nt
248        n = max(self.longest_run_of_spaces(),
249                other.longest_run_of_spaces()) + 1
250        # the self.n >= other.n test already did it for ts=1
251        for ts in range(2, n+1):
252            if self.indent_level(ts) >= other.indent_level(ts):
253                return False
254        return True
255
256    # return a list of tuples (ts, i1, i2) such that
257    # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
258    # Intended to be used after not self.less(other) is known, in which
259    # case it will return at least one witnessing tab size.
260    def not_less_witness(self, other):
261        n = max(self.longest_run_of_spaces(),
262                other.longest_run_of_spaces()) + 1
263        a = []
264        for ts in range(1, n+1):
265            if self.indent_level(ts) >= other.indent_level(ts):
266                a.append( (ts,
267                           self.indent_level(ts),
268                           other.indent_level(ts)) )
269        return a
270
271def format_witnesses(w):
272    firsts = (str(tup[0]) for tup in w)
273    prefix = "at tab size"
274    if len(w) > 1:
275        prefix = prefix + "s"
276    return prefix + " " + ', '.join(firsts)
277
278def process_tokens(tokens):
279    try:
280        _process_tokens(tokens)
281    except TabError as e:
282        raise NannyNag(e.lineno, e.msg, e.text)
283
284def _process_tokens(tokens):
285    INDENT = tokenize.INDENT
286    DEDENT = tokenize.DEDENT
287    NEWLINE = tokenize.NEWLINE
288    JUNK = tokenize.COMMENT, tokenize.NL
289    indents = [Whitespace("")]
290    check_equal = 0
291
292    for (type, token, start, end, line) in tokens:
293        if type == NEWLINE:
294            # a program statement, or ENDMARKER, will eventually follow,
295            # after some (possibly empty) run of tokens of the form
296            #     (NL | COMMENT)* (INDENT | DEDENT+)?
297            # If an INDENT appears, setting check_equal is wrong, and will
298            # be undone when we see the INDENT.
299            check_equal = 1
300
301        elif type == INDENT:
302            check_equal = 0
303            thisguy = Whitespace(token)
304            if not indents[-1].less(thisguy):
305                witness = indents[-1].not_less_witness(thisguy)
306                msg = "indent not greater e.g. " + format_witnesses(witness)
307                raise NannyNag(start[0], msg, line)
308            indents.append(thisguy)
309
310        elif type == DEDENT:
311            # there's nothing we need to check here!  what's important is
312            # that when the run of DEDENTs ends, the indentation of the
313            # program statement (or ENDMARKER) that triggered the run is
314            # equal to what's left at the top of the indents stack
315
316            # Ouch!  This assert triggers if the last line of the source
317            # is indented *and* lacks a newline -- then DEDENTs pop out
318            # of thin air.
319            # assert check_equal  # else no earlier NEWLINE, or an earlier INDENT
320            check_equal = 1
321
322            del indents[-1]
323
324        elif check_equal and type not in JUNK:
325            # this is the first "real token" following a NEWLINE, so it
326            # must be the first token of the next program statement, or an
327            # ENDMARKER; the "line" argument exposes the leading whitespace
328            # for this statement; in the case of ENDMARKER, line is an empty
329            # string, so will properly match the empty string with which the
330            # "indents" stack was seeded
331            check_equal = 0
332            thisguy = Whitespace(line)
333            if not indents[-1].equal(thisguy):
334                witness = indents[-1].not_equal_witness(thisguy)
335                msg = "indent not equal e.g. " + format_witnesses(witness)
336                raise NannyNag(start[0], msg, line)
337
338
339if __name__ == '__main__':
340    main()
341