• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#! /usr/bin/env python3
2
3"""The Tab Nanny despises ambiguous indentation.  She knows no mercy.
4
5tabnanny -- Detection of ambiguous indentation
6
7For the time being this module is intended to be called as a script.
8However it is possible to import it into an IDE and use the function
9check() described below.
10
11Warning: The API provided by this module is likely to change in future
12releases; such changes may not be backward compatible.
13"""
14
15# Released to the public domain, by Tim Peters, 15 April 1998.
16
17# XXX Note: this is now a standard library module.
18# XXX The API needs to undergo changes however; the current code is too
19# XXX script-like.  This will be addressed later.
20
21__version__ = "6"
22
23import os
24import sys
25import tokenize
26if not hasattr(tokenize, 'NL'):
27    raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
28
29__all__ = ["check", "NannyNag", "process_tokens"]
30
31verbose = 0
32filename_only = 0
33
34def errprint(*args):
35    sep = ""
36    for arg in args:
37        sys.stderr.write(sep + str(arg))
38        sep = " "
39    sys.stderr.write("\n")
40
41def main():
42    import getopt
43
44    global verbose, filename_only
45    try:
46        opts, args = getopt.getopt(sys.argv[1:], "qv")
47    except getopt.error as msg:
48        errprint(msg)
49        return
50    for o, a in opts:
51        if o == '-q':
52            filename_only = filename_only + 1
53        if o == '-v':
54            verbose = verbose + 1
55    if not args:
56        errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
57        return
58    for arg in args:
59        check(arg)
60
61class NannyNag(Exception):
62    """
63    Raised by process_tokens() if detecting an ambiguous indent.
64    Captured and handled in check().
65    """
66    def __init__(self, lineno, msg, line):
67        self.lineno, self.msg, self.line = lineno, msg, line
68    def get_lineno(self):
69        return self.lineno
70    def get_msg(self):
71        return self.msg
72    def get_line(self):
73        return self.line
74
75def check(file):
76    """check(file_or_dir)
77
78    If file_or_dir is a directory and not a symbolic link, then recursively
79    descend the directory tree named by file_or_dir, checking all .py files
80    along the way. If file_or_dir is an ordinary Python source file, it is
81    checked for whitespace related problems. The diagnostic messages are
82    written to standard output using the print statement.
83    """
84
85    if os.path.isdir(file) and not os.path.islink(file):
86        if verbose:
87            print("%r: listing directory" % (file,))
88        names = os.listdir(file)
89        for name in names:
90            fullname = os.path.join(file, name)
91            if (os.path.isdir(fullname) and
92                not os.path.islink(fullname) or
93                os.path.normcase(name[-3:]) == ".py"):
94                check(fullname)
95        return
96
97    try:
98        f = tokenize.open(file)
99    except OSError as msg:
100        errprint("%r: I/O Error: %s" % (file, msg))
101        return
102
103    if verbose > 1:
104        print("checking %r ..." % file)
105
106    try:
107        process_tokens(tokenize.generate_tokens(f.readline))
108
109    except tokenize.TokenError as msg:
110        errprint("%r: Token Error: %s" % (file, msg))
111        return
112
113    except IndentationError as msg:
114        errprint("%r: Indentation Error: %s" % (file, msg))
115        return
116
117    except NannyNag as nag:
118        badline = nag.get_lineno()
119        line = nag.get_line()
120        if verbose:
121            print("%r: *** Line %d: trouble in tab city! ***" % (file, badline))
122            print("offending line: %r" % (line,))
123            print(nag.get_msg())
124        else:
125            if ' ' in file: file = '"' + file + '"'
126            if filename_only: print(file)
127            else: print(file, badline, repr(line))
128        return
129
130    finally:
131        f.close()
132
133    if verbose:
134        print("%r: Clean bill of health." % (file,))
135
136class Whitespace:
137    # the characters used for space and tab
138    S, T = ' \t'
139
140    # members:
141    #   raw
142    #       the original string
143    #   n
144    #       the number of leading whitespace characters in raw
145    #   nt
146    #       the number of tabs in raw[:n]
147    #   norm
148    #       the normal form as a pair (count, trailing), where:
149    #       count
150    #           a tuple such that raw[:n] contains count[i]
151    #           instances of S * i + T
152    #       trailing
153    #           the number of trailing spaces in raw[:n]
154    #       It's A Theorem that m.indent_level(t) ==
155    #       n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
156    #   is_simple
157    #       true iff raw[:n] is of the form (T*)(S*)
158
159    def __init__(self, ws):
160        self.raw  = ws
161        S, T = Whitespace.S, Whitespace.T
162        count = []
163        b = n = nt = 0
164        for ch in self.raw:
165            if ch == S:
166                n = n + 1
167                b = b + 1
168            elif ch == T:
169                n = n + 1
170                nt = nt + 1
171                if b >= len(count):
172                    count = count + [0] * (b - len(count) + 1)
173                count[b] = count[b] + 1
174                b = 0
175            else:
176                break
177        self.n    = n
178        self.nt   = nt
179        self.norm = tuple(count), b
180        self.is_simple = len(count) <= 1
181
182    # return length of longest contiguous run of spaces (whether or not
183    # preceding a tab)
184    def longest_run_of_spaces(self):
185        count, trailing = self.norm
186        return max(len(count)-1, trailing)
187
188    def indent_level(self, tabsize):
189        # count, il = self.norm
190        # for i in range(len(count)):
191        #    if count[i]:
192        #        il = il + (i//tabsize + 1)*tabsize * count[i]
193        # return il
194
195        # quicker:
196        # il = trailing + sum (i//ts + 1)*ts*count[i] =
197        # trailing + ts * sum (i//ts + 1)*count[i] =
198        # trailing + ts * sum i//ts*count[i] + count[i] =
199        # trailing + ts * [(sum i//ts*count[i]) + (sum count[i])] =
200        # trailing + ts * [(sum i//ts*count[i]) + num_tabs]
201        # and note that i//ts*count[i] is 0 when i < ts
202
203        count, trailing = self.norm
204        il = 0
205        for i in range(tabsize, len(count)):
206            il = il + i//tabsize * count[i]
207        return trailing + tabsize * (il + self.nt)
208
209    # return true iff self.indent_level(t) == other.indent_level(t)
210    # for all t >= 1
211    def equal(self, other):
212        return self.norm == other.norm
213
214    # return a list of tuples (ts, i1, i2) such that
215    # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
216    # Intended to be used after not self.equal(other) is known, in which
217    # case it will return at least one witnessing tab size.
218    def not_equal_witness(self, other):
219        n = max(self.longest_run_of_spaces(),
220                other.longest_run_of_spaces()) + 1
221        a = []
222        for ts in range(1, n+1):
223            if self.indent_level(ts) != other.indent_level(ts):
224                a.append( (ts,
225                           self.indent_level(ts),
226                           other.indent_level(ts)) )
227        return a
228
229    # Return True iff self.indent_level(t) < other.indent_level(t)
230    # for all t >= 1.
231    # The algorithm is due to Vincent Broman.
232    # Easy to prove it's correct.
233    # XXXpost that.
234    # Trivial to prove n is sharp (consider T vs ST).
235    # Unknown whether there's a faster general way.  I suspected so at
236    # first, but no longer.
237    # For the special (but common!) case where M and N are both of the
238    # form (T*)(S*), M.less(N) iff M.len() < N.len() and
239    # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
240    # XXXwrite that up.
241    # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
242    def less(self, other):
243        if self.n >= other.n:
244            return False
245        if self.is_simple and other.is_simple:
246            return self.nt <= other.nt
247        n = max(self.longest_run_of_spaces(),
248                other.longest_run_of_spaces()) + 1
249        # the self.n >= other.n test already did it for ts=1
250        for ts in range(2, n+1):
251            if self.indent_level(ts) >= other.indent_level(ts):
252                return False
253        return True
254
255    # return a list of tuples (ts, i1, i2) such that
256    # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
257    # Intended to be used after not self.less(other) is known, in which
258    # case it will return at least one witnessing tab size.
259    def not_less_witness(self, other):
260        n = max(self.longest_run_of_spaces(),
261                other.longest_run_of_spaces()) + 1
262        a = []
263        for ts in range(1, n+1):
264            if self.indent_level(ts) >= other.indent_level(ts):
265                a.append( (ts,
266                           self.indent_level(ts),
267                           other.indent_level(ts)) )
268        return a
269
270def format_witnesses(w):
271    firsts = (str(tup[0]) for tup in w)
272    prefix = "at tab size"
273    if len(w) > 1:
274        prefix = prefix + "s"
275    return prefix + " " + ', '.join(firsts)
276
277def process_tokens(tokens):
278    INDENT = tokenize.INDENT
279    DEDENT = tokenize.DEDENT
280    NEWLINE = tokenize.NEWLINE
281    JUNK = tokenize.COMMENT, tokenize.NL
282    indents = [Whitespace("")]
283    check_equal = 0
284
285    for (type, token, start, end, line) in tokens:
286        if type == NEWLINE:
287            # a program statement, or ENDMARKER, will eventually follow,
288            # after some (possibly empty) run of tokens of the form
289            #     (NL | COMMENT)* (INDENT | DEDENT+)?
290            # If an INDENT appears, setting check_equal is wrong, and will
291            # be undone when we see the INDENT.
292            check_equal = 1
293
294        elif type == INDENT:
295            check_equal = 0
296            thisguy = Whitespace(token)
297            if not indents[-1].less(thisguy):
298                witness = indents[-1].not_less_witness(thisguy)
299                msg = "indent not greater e.g. " + format_witnesses(witness)
300                raise NannyNag(start[0], msg, line)
301            indents.append(thisguy)
302
303        elif type == DEDENT:
304            # there's nothing we need to check here!  what's important is
305            # that when the run of DEDENTs ends, the indentation of the
306            # program statement (or ENDMARKER) that triggered the run is
307            # equal to what's left at the top of the indents stack
308
309            # Ouch!  This assert triggers if the last line of the source
310            # is indented *and* lacks a newline -- then DEDENTs pop out
311            # of thin air.
312            # assert check_equal  # else no earlier NEWLINE, or an earlier INDENT
313            check_equal = 1
314
315            del indents[-1]
316
317        elif check_equal and type not in JUNK:
318            # this is the first "real token" following a NEWLINE, so it
319            # must be the first token of the next program statement, or an
320            # ENDMARKER; the "line" argument exposes the leading whitespace
321            # for this statement; in the case of ENDMARKER, line is an empty
322            # string, so will properly match the empty string with which the
323            # "indents" stack was seeded
324            check_equal = 0
325            thisguy = Whitespace(line)
326            if not indents[-1].equal(thisguy):
327                witness = indents[-1].not_equal_witness(thisguy)
328                msg = "indent not equal e.g. " + format_witnesses(witness)
329                raise NannyNag(start[0], msg, line)
330
331
332if __name__ == '__main__':
333    main()
334