1#! /usr/bin/env python 2 3"""The Tab Nanny despises ambiguous indentation. She knows no mercy. 4 5tabnanny -- Detection of ambiguous indentation 6 7For the time being this module is intended to be called as a script. 8However it is possible to import it into an IDE and use the function 9check() described below. 10 11Warning: The API provided by this module is likely to change in future 12releases; such changes may not be backward compatible. 13""" 14 15# Released to the public domain, by Tim Peters, 15 April 1998. 16 17# XXX Note: this is now a standard library module. 18# XXX The API needs to undergo changes however; the current code is too 19# XXX script-like. This will be addressed later. 20 21__version__ = "6" 22 23import os 24import sys 25import getopt 26import tokenize 27if not hasattr(tokenize, 'NL'): 28 raise ValueError("tokenize.NL doesn't exist -- tokenize module too old") 29 30__all__ = ["check", "NannyNag", "process_tokens"] 31 32verbose = 0 33filename_only = 0 34 35def errprint(*args): 36 sep = "" 37 for arg in args: 38 sys.stderr.write(sep + str(arg)) 39 sep = " " 40 sys.stderr.write("\n") 41 42def main(): 43 global verbose, filename_only 44 try: 45 opts, args = getopt.getopt(sys.argv[1:], "qv") 46 except getopt.error, msg: 47 errprint(msg) 48 return 49 for o, a in opts: 50 if o == '-q': 51 filename_only = filename_only + 1 52 if o == '-v': 53 verbose = verbose + 1 54 if not args: 55 errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...") 56 return 57 for arg in args: 58 check(arg) 59 60class NannyNag(Exception): 61 """ 62 Raised by tokeneater() if detecting an ambiguous indent. 63 Captured and handled in check(). 64 """ 65 def __init__(self, lineno, msg, line): 66 self.lineno, self.msg, self.line = lineno, msg, line 67 def get_lineno(self): 68 return self.lineno 69 def get_msg(self): 70 return self.msg 71 def get_line(self): 72 return self.line 73 74def check(file): 75 """check(file_or_dir) 76 77 If file_or_dir is a directory and not a symbolic link, then recursively 78 descend the directory tree named by file_or_dir, checking all .py files 79 along the way. If file_or_dir is an ordinary Python source file, it is 80 checked for whitespace related problems. The diagnostic messages are 81 written to standard output using the print statement. 82 """ 83 84 if os.path.isdir(file) and not os.path.islink(file): 85 if verbose: 86 print "%r: listing directory" % (file,) 87 names = os.listdir(file) 88 for name in names: 89 fullname = os.path.join(file, name) 90 if (os.path.isdir(fullname) and 91 not os.path.islink(fullname) or 92 os.path.normcase(name[-3:]) == ".py"): 93 check(fullname) 94 return 95 96 try: 97 f = open(file) 98 except IOError, msg: 99 errprint("%r: I/O Error: %s" % (file, msg)) 100 return 101 102 if verbose > 1: 103 print "checking %r ..." % file 104 105 try: 106 process_tokens(tokenize.generate_tokens(f.readline)) 107 108 except tokenize.TokenError, msg: 109 errprint("%r: Token Error: %s" % (file, msg)) 110 return 111 112 except IndentationError, msg: 113 errprint("%r: Indentation Error: %s" % (file, msg)) 114 return 115 116 except NannyNag, nag: 117 badline = nag.get_lineno() 118 line = nag.get_line() 119 if verbose: 120 print "%r: *** Line %d: trouble in tab city! ***" % (file, badline) 121 print "offending line: %r" % (line,) 122 print nag.get_msg() 123 else: 124 if ' ' in file: file = '"' + file + '"' 125 if filename_only: print file 126 else: print file, badline, repr(line) 127 return 128 129 if verbose: 130 print "%r: Clean bill of health." % (file,) 131 132class Whitespace: 133 # the characters used for space and tab 134 S, T = ' \t' 135 136 # members: 137 # raw 138 # the original string 139 # n 140 # the number of leading whitespace characters in raw 141 # nt 142 # the number of tabs in raw[:n] 143 # norm 144 # the normal form as a pair (count, trailing), where: 145 # count 146 # a tuple such that raw[:n] contains count[i] 147 # instances of S * i + T 148 # trailing 149 # the number of trailing spaces in raw[:n] 150 # It's A Theorem that m.indent_level(t) == 151 # n.indent_level(t) for all t >= 1 iff m.norm == n.norm. 152 # is_simple 153 # true iff raw[:n] is of the form (T*)(S*) 154 155 def __init__(self, ws): 156 self.raw = ws 157 S, T = Whitespace.S, Whitespace.T 158 count = [] 159 b = n = nt = 0 160 for ch in self.raw: 161 if ch == S: 162 n = n + 1 163 b = b + 1 164 elif ch == T: 165 n = n + 1 166 nt = nt + 1 167 if b >= len(count): 168 count = count + [0] * (b - len(count) + 1) 169 count[b] = count[b] + 1 170 b = 0 171 else: 172 break 173 self.n = n 174 self.nt = nt 175 self.norm = tuple(count), b 176 self.is_simple = len(count) <= 1 177 178 # return length of longest contiguous run of spaces (whether or not 179 # preceding a tab) 180 def longest_run_of_spaces(self): 181 count, trailing = self.norm 182 return max(len(count)-1, trailing) 183 184 def indent_level(self, tabsize): 185 # count, il = self.norm 186 # for i in range(len(count)): 187 # if count[i]: 188 # il = il + (i/tabsize + 1)*tabsize * count[i] 189 # return il 190 191 # quicker: 192 # il = trailing + sum (i/ts + 1)*ts*count[i] = 193 # trailing + ts * sum (i/ts + 1)*count[i] = 194 # trailing + ts * sum i/ts*count[i] + count[i] = 195 # trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] = 196 # trailing + ts * [(sum i/ts*count[i]) + num_tabs] 197 # and note that i/ts*count[i] is 0 when i < ts 198 199 count, trailing = self.norm 200 il = 0 201 for i in range(tabsize, len(count)): 202 il = il + i/tabsize * count[i] 203 return trailing + tabsize * (il + self.nt) 204 205 # return true iff self.indent_level(t) == other.indent_level(t) 206 # for all t >= 1 207 def equal(self, other): 208 return self.norm == other.norm 209 210 # return a list of tuples (ts, i1, i2) such that 211 # i1 == self.indent_level(ts) != other.indent_level(ts) == i2. 212 # Intended to be used after not self.equal(other) is known, in which 213 # case it will return at least one witnessing tab size. 214 def not_equal_witness(self, other): 215 n = max(self.longest_run_of_spaces(), 216 other.longest_run_of_spaces()) + 1 217 a = [] 218 for ts in range(1, n+1): 219 if self.indent_level(ts) != other.indent_level(ts): 220 a.append( (ts, 221 self.indent_level(ts), 222 other.indent_level(ts)) ) 223 return a 224 225 # Return True iff self.indent_level(t) < other.indent_level(t) 226 # for all t >= 1. 227 # The algorithm is due to Vincent Broman. 228 # Easy to prove it's correct. 229 # XXXpost that. 230 # Trivial to prove n is sharp (consider T vs ST). 231 # Unknown whether there's a faster general way. I suspected so at 232 # first, but no longer. 233 # For the special (but common!) case where M and N are both of the 234 # form (T*)(S*), M.less(N) iff M.len() < N.len() and 235 # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded. 236 # XXXwrite that up. 237 # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1. 238 def less(self, other): 239 if self.n >= other.n: 240 return False 241 if self.is_simple and other.is_simple: 242 return self.nt <= other.nt 243 n = max(self.longest_run_of_spaces(), 244 other.longest_run_of_spaces()) + 1 245 # the self.n >= other.n test already did it for ts=1 246 for ts in range(2, n+1): 247 if self.indent_level(ts) >= other.indent_level(ts): 248 return False 249 return True 250 251 # return a list of tuples (ts, i1, i2) such that 252 # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2. 253 # Intended to be used after not self.less(other) is known, in which 254 # case it will return at least one witnessing tab size. 255 def not_less_witness(self, other): 256 n = max(self.longest_run_of_spaces(), 257 other.longest_run_of_spaces()) + 1 258 a = [] 259 for ts in range(1, n+1): 260 if self.indent_level(ts) >= other.indent_level(ts): 261 a.append( (ts, 262 self.indent_level(ts), 263 other.indent_level(ts)) ) 264 return a 265 266def format_witnesses(w): 267 firsts = map(lambda tup: str(tup[0]), w) 268 prefix = "at tab size" 269 if len(w) > 1: 270 prefix = prefix + "s" 271 return prefix + " " + ', '.join(firsts) 272 273def process_tokens(tokens): 274 INDENT = tokenize.INDENT 275 DEDENT = tokenize.DEDENT 276 NEWLINE = tokenize.NEWLINE 277 JUNK = tokenize.COMMENT, tokenize.NL 278 indents = [Whitespace("")] 279 check_equal = 0 280 281 for (type, token, start, end, line) in tokens: 282 if type == NEWLINE: 283 # a program statement, or ENDMARKER, will eventually follow, 284 # after some (possibly empty) run of tokens of the form 285 # (NL | COMMENT)* (INDENT | DEDENT+)? 286 # If an INDENT appears, setting check_equal is wrong, and will 287 # be undone when we see the INDENT. 288 check_equal = 1 289 290 elif type == INDENT: 291 check_equal = 0 292 thisguy = Whitespace(token) 293 if not indents[-1].less(thisguy): 294 witness = indents[-1].not_less_witness(thisguy) 295 msg = "indent not greater e.g. " + format_witnesses(witness) 296 raise NannyNag(start[0], msg, line) 297 indents.append(thisguy) 298 299 elif type == DEDENT: 300 # there's nothing we need to check here! what's important is 301 # that when the run of DEDENTs ends, the indentation of the 302 # program statement (or ENDMARKER) that triggered the run is 303 # equal to what's left at the top of the indents stack 304 305 # Ouch! This assert triggers if the last line of the source 306 # is indented *and* lacks a newline -- then DEDENTs pop out 307 # of thin air. 308 # assert check_equal # else no earlier NEWLINE, or an earlier INDENT 309 check_equal = 1 310 311 del indents[-1] 312 313 elif check_equal and type not in JUNK: 314 # this is the first "real token" following a NEWLINE, so it 315 # must be the first token of the next program statement, or an 316 # ENDMARKER; the "line" argument exposes the leading whitespace 317 # for this statement; in the case of ENDMARKER, line is an empty 318 # string, so will properly match the empty string with which the 319 # "indents" stack was seeded 320 check_equal = 0 321 thisguy = Whitespace(line) 322 if not indents[-1].equal(thisguy): 323 witness = indents[-1].not_equal_witness(thisguy) 324 msg = "indent not equal e.g. " + format_witnesses(witness) 325 raise NannyNag(start[0], msg, line) 326 327 328if __name__ == '__main__': 329 main() 330