1#! /usr/bin/env python3 2 3"""The Tab Nanny despises ambiguous indentation. She knows no mercy. 4 5tabnanny -- Detection of ambiguous indentation 6 7For the time being this module is intended to be called as a script. 8However it is possible to import it into an IDE and use the function 9check() described below. 10 11Warning: The API provided by this module is likely to change in future 12releases; such changes may not be backward compatible. 13""" 14 15# Released to the public domain, by Tim Peters, 15 April 1998. 16 17# XXX Note: this is now a standard library module. 18# XXX The API needs to undergo changes however; the current code is too 19# XXX script-like. This will be addressed later. 20 21__version__ = "6" 22 23import os 24import sys 25import tokenize 26 27__all__ = ["check", "NannyNag", "process_tokens"] 28 29verbose = 0 30filename_only = 0 31 32def errprint(*args): 33 sep = "" 34 for arg in args: 35 sys.stderr.write(sep + str(arg)) 36 sep = " " 37 sys.stderr.write("\n") 38 sys.exit(1) 39 40def main(): 41 import getopt 42 43 global verbose, filename_only 44 try: 45 opts, args = getopt.getopt(sys.argv[1:], "qv") 46 except getopt.error as msg: 47 errprint(msg) 48 for o, a in opts: 49 if o == '-q': 50 filename_only = filename_only + 1 51 if o == '-v': 52 verbose = verbose + 1 53 if not args: 54 errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...") 55 for arg in args: 56 check(arg) 57 58class NannyNag(Exception): 59 """ 60 Raised by process_tokens() if detecting an ambiguous indent. 61 Captured and handled in check(). 62 """ 63 def __init__(self, lineno, msg, line): 64 self.lineno, self.msg, self.line = lineno, msg, line 65 def get_lineno(self): 66 return self.lineno 67 def get_msg(self): 68 return self.msg 69 def get_line(self): 70 return self.line 71 72def check(file): 73 """check(file_or_dir) 74 75 If file_or_dir is a directory and not a symbolic link, then recursively 76 descend the directory tree named by file_or_dir, checking all .py files 77 along the way. If file_or_dir is an ordinary Python source file, it is 78 checked for whitespace related problems. The diagnostic messages are 79 written to standard output using the print statement. 80 """ 81 82 if os.path.isdir(file) and not os.path.islink(file): 83 if verbose: 84 print("%r: listing directory" % (file,)) 85 names = os.listdir(file) 86 for name in names: 87 fullname = os.path.join(file, name) 88 if (os.path.isdir(fullname) and 89 not os.path.islink(fullname) or 90 os.path.normcase(name[-3:]) == ".py"): 91 check(fullname) 92 return 93 94 try: 95 f = tokenize.open(file) 96 except OSError as msg: 97 errprint("%r: I/O Error: %s" % (file, msg)) 98 return 99 100 if verbose > 1: 101 print("checking %r ..." % file) 102 103 try: 104 process_tokens(tokenize.generate_tokens(f.readline)) 105 106 except tokenize.TokenError as msg: 107 errprint("%r: Token Error: %s" % (file, msg)) 108 return 109 110 except IndentationError as msg: 111 errprint("%r: Indentation Error: %s" % (file, msg)) 112 return 113 114 except SyntaxError as msg: 115 errprint("%r: Syntax Error: %s" % (file, msg)) 116 return 117 118 except NannyNag as nag: 119 badline = nag.get_lineno() 120 line = nag.get_line() 121 if verbose: 122 print("%r: *** Line %d: trouble in tab city! ***" % (file, badline)) 123 print("offending line: %r" % (line,)) 124 print(nag.get_msg()) 125 else: 126 if ' ' in file: file = '"' + file + '"' 127 if filename_only: print(file) 128 else: print(file, badline, repr(line)) 129 return 130 131 finally: 132 f.close() 133 134 if verbose: 135 print("%r: Clean bill of health." % (file,)) 136 137class Whitespace: 138 # the characters used for space and tab 139 S, T = ' \t' 140 141 # members: 142 # raw 143 # the original string 144 # n 145 # the number of leading whitespace characters in raw 146 # nt 147 # the number of tabs in raw[:n] 148 # norm 149 # the normal form as a pair (count, trailing), where: 150 # count 151 # a tuple such that raw[:n] contains count[i] 152 # instances of S * i + T 153 # trailing 154 # the number of trailing spaces in raw[:n] 155 # It's A Theorem that m.indent_level(t) == 156 # n.indent_level(t) for all t >= 1 iff m.norm == n.norm. 157 # is_simple 158 # true iff raw[:n] is of the form (T*)(S*) 159 160 def __init__(self, ws): 161 self.raw = ws 162 S, T = Whitespace.S, Whitespace.T 163 count = [] 164 b = n = nt = 0 165 for ch in self.raw: 166 if ch == S: 167 n = n + 1 168 b = b + 1 169 elif ch == T: 170 n = n + 1 171 nt = nt + 1 172 if b >= len(count): 173 count = count + [0] * (b - len(count) + 1) 174 count[b] = count[b] + 1 175 b = 0 176 else: 177 break 178 self.n = n 179 self.nt = nt 180 self.norm = tuple(count), b 181 self.is_simple = len(count) <= 1 182 183 # return length of longest contiguous run of spaces (whether or not 184 # preceding a tab) 185 def longest_run_of_spaces(self): 186 count, trailing = self.norm 187 return max(len(count)-1, trailing) 188 189 def indent_level(self, tabsize): 190 # count, il = self.norm 191 # for i in range(len(count)): 192 # if count[i]: 193 # il = il + (i//tabsize + 1)*tabsize * count[i] 194 # return il 195 196 # quicker: 197 # il = trailing + sum (i//ts + 1)*ts*count[i] = 198 # trailing + ts * sum (i//ts + 1)*count[i] = 199 # trailing + ts * sum i//ts*count[i] + count[i] = 200 # trailing + ts * [(sum i//ts*count[i]) + (sum count[i])] = 201 # trailing + ts * [(sum i//ts*count[i]) + num_tabs] 202 # and note that i//ts*count[i] is 0 when i < ts 203 204 count, trailing = self.norm 205 il = 0 206 for i in range(tabsize, len(count)): 207 il = il + i//tabsize * count[i] 208 return trailing + tabsize * (il + self.nt) 209 210 # return true iff self.indent_level(t) == other.indent_level(t) 211 # for all t >= 1 212 def equal(self, other): 213 return self.norm == other.norm 214 215 # return a list of tuples (ts, i1, i2) such that 216 # i1 == self.indent_level(ts) != other.indent_level(ts) == i2. 217 # Intended to be used after not self.equal(other) is known, in which 218 # case it will return at least one witnessing tab size. 219 def not_equal_witness(self, other): 220 n = max(self.longest_run_of_spaces(), 221 other.longest_run_of_spaces()) + 1 222 a = [] 223 for ts in range(1, n+1): 224 if self.indent_level(ts) != other.indent_level(ts): 225 a.append( (ts, 226 self.indent_level(ts), 227 other.indent_level(ts)) ) 228 return a 229 230 # Return True iff self.indent_level(t) < other.indent_level(t) 231 # for all t >= 1. 232 # The algorithm is due to Vincent Broman. 233 # Easy to prove it's correct. 234 # XXXpost that. 235 # Trivial to prove n is sharp (consider T vs ST). 236 # Unknown whether there's a faster general way. I suspected so at 237 # first, but no longer. 238 # For the special (but common!) case where M and N are both of the 239 # form (T*)(S*), M.less(N) iff M.len() < N.len() and 240 # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded. 241 # XXXwrite that up. 242 # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1. 243 def less(self, other): 244 if self.n >= other.n: 245 return False 246 if self.is_simple and other.is_simple: 247 return self.nt <= other.nt 248 n = max(self.longest_run_of_spaces(), 249 other.longest_run_of_spaces()) + 1 250 # the self.n >= other.n test already did it for ts=1 251 for ts in range(2, n+1): 252 if self.indent_level(ts) >= other.indent_level(ts): 253 return False 254 return True 255 256 # return a list of tuples (ts, i1, i2) such that 257 # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2. 258 # Intended to be used after not self.less(other) is known, in which 259 # case it will return at least one witnessing tab size. 260 def not_less_witness(self, other): 261 n = max(self.longest_run_of_spaces(), 262 other.longest_run_of_spaces()) + 1 263 a = [] 264 for ts in range(1, n+1): 265 if self.indent_level(ts) >= other.indent_level(ts): 266 a.append( (ts, 267 self.indent_level(ts), 268 other.indent_level(ts)) ) 269 return a 270 271def format_witnesses(w): 272 firsts = (str(tup[0]) for tup in w) 273 prefix = "at tab size" 274 if len(w) > 1: 275 prefix = prefix + "s" 276 return prefix + " " + ', '.join(firsts) 277 278def process_tokens(tokens): 279 try: 280 _process_tokens(tokens) 281 except TabError as e: 282 raise NannyNag(e.lineno, e.msg, e.text) 283 284def _process_tokens(tokens): 285 INDENT = tokenize.INDENT 286 DEDENT = tokenize.DEDENT 287 NEWLINE = tokenize.NEWLINE 288 JUNK = tokenize.COMMENT, tokenize.NL 289 indents = [Whitespace("")] 290 check_equal = 0 291 292 for (type, token, start, end, line) in tokens: 293 if type == NEWLINE: 294 # a program statement, or ENDMARKER, will eventually follow, 295 # after some (possibly empty) run of tokens of the form 296 # (NL | COMMENT)* (INDENT | DEDENT+)? 297 # If an INDENT appears, setting check_equal is wrong, and will 298 # be undone when we see the INDENT. 299 check_equal = 1 300 301 elif type == INDENT: 302 check_equal = 0 303 thisguy = Whitespace(token) 304 if not indents[-1].less(thisguy): 305 witness = indents[-1].not_less_witness(thisguy) 306 msg = "indent not greater e.g. " + format_witnesses(witness) 307 raise NannyNag(start[0], msg, line) 308 indents.append(thisguy) 309 310 elif type == DEDENT: 311 # there's nothing we need to check here! what's important is 312 # that when the run of DEDENTs ends, the indentation of the 313 # program statement (or ENDMARKER) that triggered the run is 314 # equal to what's left at the top of the indents stack 315 316 # Ouch! This assert triggers if the last line of the source 317 # is indented *and* lacks a newline -- then DEDENTs pop out 318 # of thin air. 319 # assert check_equal # else no earlier NEWLINE, or an earlier INDENT 320 check_equal = 1 321 322 del indents[-1] 323 324 elif check_equal and type not in JUNK: 325 # this is the first "real token" following a NEWLINE, so it 326 # must be the first token of the next program statement, or an 327 # ENDMARKER; the "line" argument exposes the leading whitespace 328 # for this statement; in the case of ENDMARKER, line is an empty 329 # string, so will properly match the empty string with which the 330 # "indents" stack was seeded 331 check_equal = 0 332 thisguy = Whitespace(line) 333 if not indents[-1].equal(thisguy): 334 witness = indents[-1].not_equal_witness(thisguy) 335 msg = "indent not equal e.g. " + format_witnesses(witness) 336 raise NannyNag(start[0], msg, line) 337 338 339if __name__ == '__main__': 340 main() 341