1#! /usr/bin/env python3 2 3"""The Tab Nanny despises ambiguous indentation. She knows no mercy. 4 5tabnanny -- Detection of ambiguous indentation 6 7For the time being this module is intended to be called as a script. 8However it is possible to import it into an IDE and use the function 9check() described below. 10 11Warning: The API provided by this module is likely to change in future 12releases; such changes may not be backward compatible. 13""" 14 15# Released to the public domain, by Tim Peters, 15 April 1998. 16 17# XXX Note: this is now a standard library module. 18# XXX The API needs to undergo changes however; the current code is too 19# XXX script-like. This will be addressed later. 20 21__version__ = "6" 22 23import os 24import sys 25import getopt 26import tokenize 27if not hasattr(tokenize, 'NL'): 28 raise ValueError("tokenize.NL doesn't exist -- tokenize module too old") 29 30__all__ = ["check", "NannyNag", "process_tokens"] 31 32verbose = 0 33filename_only = 0 34 35def errprint(*args): 36 sep = "" 37 for arg in args: 38 sys.stderr.write(sep + str(arg)) 39 sep = " " 40 sys.stderr.write("\n") 41 42def main(): 43 global verbose, filename_only 44 try: 45 opts, args = getopt.getopt(sys.argv[1:], "qv") 46 except getopt.error as msg: 47 errprint(msg) 48 return 49 for o, a in opts: 50 if o == '-q': 51 filename_only = filename_only + 1 52 if o == '-v': 53 verbose = verbose + 1 54 if not args: 55 errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...") 56 return 57 for arg in args: 58 check(arg) 59 60class NannyNag(Exception): 61 """ 62 Raised by tokeneater() if detecting an ambiguous indent. 63 Captured and handled in check(). 64 """ 65 def __init__(self, lineno, msg, line): 66 self.lineno, self.msg, self.line = lineno, msg, line 67 def get_lineno(self): 68 return self.lineno 69 def get_msg(self): 70 return self.msg 71 def get_line(self): 72 return self.line 73 74def check(file): 75 """check(file_or_dir) 76 77 If file_or_dir is a directory and not a symbolic link, then recursively 78 descend the directory tree named by file_or_dir, checking all .py files 79 along the way. If file_or_dir is an ordinary Python source file, it is 80 checked for whitespace related problems. The diagnostic messages are 81 written to standard output using the print statement. 82 """ 83 84 if os.path.isdir(file) and not os.path.islink(file): 85 if verbose: 86 print("%r: listing directory" % (file,)) 87 names = os.listdir(file) 88 for name in names: 89 fullname = os.path.join(file, name) 90 if (os.path.isdir(fullname) and 91 not os.path.islink(fullname) or 92 os.path.normcase(name[-3:]) == ".py"): 93 check(fullname) 94 return 95 96 try: 97 f = tokenize.open(file) 98 except OSError as msg: 99 errprint("%r: I/O Error: %s" % (file, msg)) 100 return 101 102 if verbose > 1: 103 print("checking %r ..." % file) 104 105 try: 106 process_tokens(tokenize.generate_tokens(f.readline)) 107 108 except tokenize.TokenError as msg: 109 errprint("%r: Token Error: %s" % (file, msg)) 110 return 111 112 except IndentationError as msg: 113 errprint("%r: Indentation Error: %s" % (file, msg)) 114 return 115 116 except NannyNag as nag: 117 badline = nag.get_lineno() 118 line = nag.get_line() 119 if verbose: 120 print("%r: *** Line %d: trouble in tab city! ***" % (file, badline)) 121 print("offending line: %r" % (line,)) 122 print(nag.get_msg()) 123 else: 124 if ' ' in file: file = '"' + file + '"' 125 if filename_only: print(file) 126 else: print(file, badline, repr(line)) 127 return 128 129 finally: 130 f.close() 131 132 if verbose: 133 print("%r: Clean bill of health." % (file,)) 134 135class Whitespace: 136 # the characters used for space and tab 137 S, T = ' \t' 138 139 # members: 140 # raw 141 # the original string 142 # n 143 # the number of leading whitespace characters in raw 144 # nt 145 # the number of tabs in raw[:n] 146 # norm 147 # the normal form as a pair (count, trailing), where: 148 # count 149 # a tuple such that raw[:n] contains count[i] 150 # instances of S * i + T 151 # trailing 152 # the number of trailing spaces in raw[:n] 153 # It's A Theorem that m.indent_level(t) == 154 # n.indent_level(t) for all t >= 1 iff m.norm == n.norm. 155 # is_simple 156 # true iff raw[:n] is of the form (T*)(S*) 157 158 def __init__(self, ws): 159 self.raw = ws 160 S, T = Whitespace.S, Whitespace.T 161 count = [] 162 b = n = nt = 0 163 for ch in self.raw: 164 if ch == S: 165 n = n + 1 166 b = b + 1 167 elif ch == T: 168 n = n + 1 169 nt = nt + 1 170 if b >= len(count): 171 count = count + [0] * (b - len(count) + 1) 172 count[b] = count[b] + 1 173 b = 0 174 else: 175 break 176 self.n = n 177 self.nt = nt 178 self.norm = tuple(count), b 179 self.is_simple = len(count) <= 1 180 181 # return length of longest contiguous run of spaces (whether or not 182 # preceding a tab) 183 def longest_run_of_spaces(self): 184 count, trailing = self.norm 185 return max(len(count)-1, trailing) 186 187 def indent_level(self, tabsize): 188 # count, il = self.norm 189 # for i in range(len(count)): 190 # if count[i]: 191 # il = il + (i//tabsize + 1)*tabsize * count[i] 192 # return il 193 194 # quicker: 195 # il = trailing + sum (i//ts + 1)*ts*count[i] = 196 # trailing + ts * sum (i//ts + 1)*count[i] = 197 # trailing + ts * sum i//ts*count[i] + count[i] = 198 # trailing + ts * [(sum i//ts*count[i]) + (sum count[i])] = 199 # trailing + ts * [(sum i//ts*count[i]) + num_tabs] 200 # and note that i//ts*count[i] is 0 when i < ts 201 202 count, trailing = self.norm 203 il = 0 204 for i in range(tabsize, len(count)): 205 il = il + i//tabsize * count[i] 206 return trailing + tabsize * (il + self.nt) 207 208 # return true iff self.indent_level(t) == other.indent_level(t) 209 # for all t >= 1 210 def equal(self, other): 211 return self.norm == other.norm 212 213 # return a list of tuples (ts, i1, i2) such that 214 # i1 == self.indent_level(ts) != other.indent_level(ts) == i2. 215 # Intended to be used after not self.equal(other) is known, in which 216 # case it will return at least one witnessing tab size. 217 def not_equal_witness(self, other): 218 n = max(self.longest_run_of_spaces(), 219 other.longest_run_of_spaces()) + 1 220 a = [] 221 for ts in range(1, n+1): 222 if self.indent_level(ts) != other.indent_level(ts): 223 a.append( (ts, 224 self.indent_level(ts), 225 other.indent_level(ts)) ) 226 return a 227 228 # Return True iff self.indent_level(t) < other.indent_level(t) 229 # for all t >= 1. 230 # The algorithm is due to Vincent Broman. 231 # Easy to prove it's correct. 232 # XXXpost that. 233 # Trivial to prove n is sharp (consider T vs ST). 234 # Unknown whether there's a faster general way. I suspected so at 235 # first, but no longer. 236 # For the special (but common!) case where M and N are both of the 237 # form (T*)(S*), M.less(N) iff M.len() < N.len() and 238 # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded. 239 # XXXwrite that up. 240 # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1. 241 def less(self, other): 242 if self.n >= other.n: 243 return False 244 if self.is_simple and other.is_simple: 245 return self.nt <= other.nt 246 n = max(self.longest_run_of_spaces(), 247 other.longest_run_of_spaces()) + 1 248 # the self.n >= other.n test already did it for ts=1 249 for ts in range(2, n+1): 250 if self.indent_level(ts) >= other.indent_level(ts): 251 return False 252 return True 253 254 # return a list of tuples (ts, i1, i2) such that 255 # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2. 256 # Intended to be used after not self.less(other) is known, in which 257 # case it will return at least one witnessing tab size. 258 def not_less_witness(self, other): 259 n = max(self.longest_run_of_spaces(), 260 other.longest_run_of_spaces()) + 1 261 a = [] 262 for ts in range(1, n+1): 263 if self.indent_level(ts) >= other.indent_level(ts): 264 a.append( (ts, 265 self.indent_level(ts), 266 other.indent_level(ts)) ) 267 return a 268 269def format_witnesses(w): 270 firsts = (str(tup[0]) for tup in w) 271 prefix = "at tab size" 272 if len(w) > 1: 273 prefix = prefix + "s" 274 return prefix + " " + ', '.join(firsts) 275 276def process_tokens(tokens): 277 INDENT = tokenize.INDENT 278 DEDENT = tokenize.DEDENT 279 NEWLINE = tokenize.NEWLINE 280 JUNK = tokenize.COMMENT, tokenize.NL 281 indents = [Whitespace("")] 282 check_equal = 0 283 284 for (type, token, start, end, line) in tokens: 285 if type == NEWLINE: 286 # a program statement, or ENDMARKER, will eventually follow, 287 # after some (possibly empty) run of tokens of the form 288 # (NL | COMMENT)* (INDENT | DEDENT+)? 289 # If an INDENT appears, setting check_equal is wrong, and will 290 # be undone when we see the INDENT. 291 check_equal = 1 292 293 elif type == INDENT: 294 check_equal = 0 295 thisguy = Whitespace(token) 296 if not indents[-1].less(thisguy): 297 witness = indents[-1].not_less_witness(thisguy) 298 msg = "indent not greater e.g. " + format_witnesses(witness) 299 raise NannyNag(start[0], msg, line) 300 indents.append(thisguy) 301 302 elif type == DEDENT: 303 # there's nothing we need to check here! what's important is 304 # that when the run of DEDENTs ends, the indentation of the 305 # program statement (or ENDMARKER) that triggered the run is 306 # equal to what's left at the top of the indents stack 307 308 # Ouch! This assert triggers if the last line of the source 309 # is indented *and* lacks a newline -- then DEDENTs pop out 310 # of thin air. 311 # assert check_equal # else no earlier NEWLINE, or an earlier INDENT 312 check_equal = 1 313 314 del indents[-1] 315 316 elif check_equal and type not in JUNK: 317 # this is the first "real token" following a NEWLINE, so it 318 # must be the first token of the next program statement, or an 319 # ENDMARKER; the "line" argument exposes the leading whitespace 320 # for this statement; in the case of ENDMARKER, line is an empty 321 # string, so will properly match the empty string with which the 322 # "indents" stack was seeded 323 check_equal = 0 324 thisguy = Whitespace(line) 325 if not indents[-1].equal(thisguy): 326 witness = indents[-1].not_equal_witness(thisguy) 327 msg = "indent not equal e.g. " + format_witnesses(witness) 328 raise NannyNag(start[0], msg, line) 329 330 331if __name__ == '__main__': 332 main() 333