1#! /usr/bin/env python3 2 3"""The Tab Nanny despises ambiguous indentation. She knows no mercy. 4 5tabnanny -- Detection of ambiguous indentation 6 7For the time being this module is intended to be called as a script. 8However it is possible to import it into an IDE and use the function 9check() described below. 10 11Warning: The API provided by this module is likely to change in future 12releases; such changes may not be backward compatible. 13""" 14 15# Released to the public domain, by Tim Peters, 15 April 1998. 16 17# XXX Note: this is now a standard library module. 18# XXX The API needs to undergo changes however; the current code is too 19# XXX script-like. This will be addressed later. 20 21__version__ = "6" 22 23import os 24import sys 25import tokenize 26if not hasattr(tokenize, 'NL'): 27 raise ValueError("tokenize.NL doesn't exist -- tokenize module too old") 28 29__all__ = ["check", "NannyNag", "process_tokens"] 30 31verbose = 0 32filename_only = 0 33 34def errprint(*args): 35 sep = "" 36 for arg in args: 37 sys.stderr.write(sep + str(arg)) 38 sep = " " 39 sys.stderr.write("\n") 40 41def main(): 42 import getopt 43 44 global verbose, filename_only 45 try: 46 opts, args = getopt.getopt(sys.argv[1:], "qv") 47 except getopt.error as msg: 48 errprint(msg) 49 return 50 for o, a in opts: 51 if o == '-q': 52 filename_only = filename_only + 1 53 if o == '-v': 54 verbose = verbose + 1 55 if not args: 56 errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...") 57 return 58 for arg in args: 59 check(arg) 60 61class NannyNag(Exception): 62 """ 63 Raised by process_tokens() if detecting an ambiguous indent. 64 Captured and handled in check(). 65 """ 66 def __init__(self, lineno, msg, line): 67 self.lineno, self.msg, self.line = lineno, msg, line 68 def get_lineno(self): 69 return self.lineno 70 def get_msg(self): 71 return self.msg 72 def get_line(self): 73 return self.line 74 75def check(file): 76 """check(file_or_dir) 77 78 If file_or_dir is a directory and not a symbolic link, then recursively 79 descend the directory tree named by file_or_dir, checking all .py files 80 along the way. If file_or_dir is an ordinary Python source file, it is 81 checked for whitespace related problems. The diagnostic messages are 82 written to standard output using the print statement. 83 """ 84 85 if os.path.isdir(file) and not os.path.islink(file): 86 if verbose: 87 print("%r: listing directory" % (file,)) 88 names = os.listdir(file) 89 for name in names: 90 fullname = os.path.join(file, name) 91 if (os.path.isdir(fullname) and 92 not os.path.islink(fullname) or 93 os.path.normcase(name[-3:]) == ".py"): 94 check(fullname) 95 return 96 97 try: 98 f = tokenize.open(file) 99 except OSError as msg: 100 errprint("%r: I/O Error: %s" % (file, msg)) 101 return 102 103 if verbose > 1: 104 print("checking %r ..." % file) 105 106 try: 107 process_tokens(tokenize.generate_tokens(f.readline)) 108 109 except tokenize.TokenError as msg: 110 errprint("%r: Token Error: %s" % (file, msg)) 111 return 112 113 except IndentationError as msg: 114 errprint("%r: Indentation Error: %s" % (file, msg)) 115 return 116 117 except NannyNag as nag: 118 badline = nag.get_lineno() 119 line = nag.get_line() 120 if verbose: 121 print("%r: *** Line %d: trouble in tab city! ***" % (file, badline)) 122 print("offending line: %r" % (line,)) 123 print(nag.get_msg()) 124 else: 125 if ' ' in file: file = '"' + file + '"' 126 if filename_only: print(file) 127 else: print(file, badline, repr(line)) 128 return 129 130 finally: 131 f.close() 132 133 if verbose: 134 print("%r: Clean bill of health." % (file,)) 135 136class Whitespace: 137 # the characters used for space and tab 138 S, T = ' \t' 139 140 # members: 141 # raw 142 # the original string 143 # n 144 # the number of leading whitespace characters in raw 145 # nt 146 # the number of tabs in raw[:n] 147 # norm 148 # the normal form as a pair (count, trailing), where: 149 # count 150 # a tuple such that raw[:n] contains count[i] 151 # instances of S * i + T 152 # trailing 153 # the number of trailing spaces in raw[:n] 154 # It's A Theorem that m.indent_level(t) == 155 # n.indent_level(t) for all t >= 1 iff m.norm == n.norm. 156 # is_simple 157 # true iff raw[:n] is of the form (T*)(S*) 158 159 def __init__(self, ws): 160 self.raw = ws 161 S, T = Whitespace.S, Whitespace.T 162 count = [] 163 b = n = nt = 0 164 for ch in self.raw: 165 if ch == S: 166 n = n + 1 167 b = b + 1 168 elif ch == T: 169 n = n + 1 170 nt = nt + 1 171 if b >= len(count): 172 count = count + [0] * (b - len(count) + 1) 173 count[b] = count[b] + 1 174 b = 0 175 else: 176 break 177 self.n = n 178 self.nt = nt 179 self.norm = tuple(count), b 180 self.is_simple = len(count) <= 1 181 182 # return length of longest contiguous run of spaces (whether or not 183 # preceding a tab) 184 def longest_run_of_spaces(self): 185 count, trailing = self.norm 186 return max(len(count)-1, trailing) 187 188 def indent_level(self, tabsize): 189 # count, il = self.norm 190 # for i in range(len(count)): 191 # if count[i]: 192 # il = il + (i//tabsize + 1)*tabsize * count[i] 193 # return il 194 195 # quicker: 196 # il = trailing + sum (i//ts + 1)*ts*count[i] = 197 # trailing + ts * sum (i//ts + 1)*count[i] = 198 # trailing + ts * sum i//ts*count[i] + count[i] = 199 # trailing + ts * [(sum i//ts*count[i]) + (sum count[i])] = 200 # trailing + ts * [(sum i//ts*count[i]) + num_tabs] 201 # and note that i//ts*count[i] is 0 when i < ts 202 203 count, trailing = self.norm 204 il = 0 205 for i in range(tabsize, len(count)): 206 il = il + i//tabsize * count[i] 207 return trailing + tabsize * (il + self.nt) 208 209 # return true iff self.indent_level(t) == other.indent_level(t) 210 # for all t >= 1 211 def equal(self, other): 212 return self.norm == other.norm 213 214 # return a list of tuples (ts, i1, i2) such that 215 # i1 == self.indent_level(ts) != other.indent_level(ts) == i2. 216 # Intended to be used after not self.equal(other) is known, in which 217 # case it will return at least one witnessing tab size. 218 def not_equal_witness(self, other): 219 n = max(self.longest_run_of_spaces(), 220 other.longest_run_of_spaces()) + 1 221 a = [] 222 for ts in range(1, n+1): 223 if self.indent_level(ts) != other.indent_level(ts): 224 a.append( (ts, 225 self.indent_level(ts), 226 other.indent_level(ts)) ) 227 return a 228 229 # Return True iff self.indent_level(t) < other.indent_level(t) 230 # for all t >= 1. 231 # The algorithm is due to Vincent Broman. 232 # Easy to prove it's correct. 233 # XXXpost that. 234 # Trivial to prove n is sharp (consider T vs ST). 235 # Unknown whether there's a faster general way. I suspected so at 236 # first, but no longer. 237 # For the special (but common!) case where M and N are both of the 238 # form (T*)(S*), M.less(N) iff M.len() < N.len() and 239 # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded. 240 # XXXwrite that up. 241 # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1. 242 def less(self, other): 243 if self.n >= other.n: 244 return False 245 if self.is_simple and other.is_simple: 246 return self.nt <= other.nt 247 n = max(self.longest_run_of_spaces(), 248 other.longest_run_of_spaces()) + 1 249 # the self.n >= other.n test already did it for ts=1 250 for ts in range(2, n+1): 251 if self.indent_level(ts) >= other.indent_level(ts): 252 return False 253 return True 254 255 # return a list of tuples (ts, i1, i2) such that 256 # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2. 257 # Intended to be used after not self.less(other) is known, in which 258 # case it will return at least one witnessing tab size. 259 def not_less_witness(self, other): 260 n = max(self.longest_run_of_spaces(), 261 other.longest_run_of_spaces()) + 1 262 a = [] 263 for ts in range(1, n+1): 264 if self.indent_level(ts) >= other.indent_level(ts): 265 a.append( (ts, 266 self.indent_level(ts), 267 other.indent_level(ts)) ) 268 return a 269 270def format_witnesses(w): 271 firsts = (str(tup[0]) for tup in w) 272 prefix = "at tab size" 273 if len(w) > 1: 274 prefix = prefix + "s" 275 return prefix + " " + ', '.join(firsts) 276 277def process_tokens(tokens): 278 INDENT = tokenize.INDENT 279 DEDENT = tokenize.DEDENT 280 NEWLINE = tokenize.NEWLINE 281 JUNK = tokenize.COMMENT, tokenize.NL 282 indents = [Whitespace("")] 283 check_equal = 0 284 285 for (type, token, start, end, line) in tokens: 286 if type == NEWLINE: 287 # a program statement, or ENDMARKER, will eventually follow, 288 # after some (possibly empty) run of tokens of the form 289 # (NL | COMMENT)* (INDENT | DEDENT+)? 290 # If an INDENT appears, setting check_equal is wrong, and will 291 # be undone when we see the INDENT. 292 check_equal = 1 293 294 elif type == INDENT: 295 check_equal = 0 296 thisguy = Whitespace(token) 297 if not indents[-1].less(thisguy): 298 witness = indents[-1].not_less_witness(thisguy) 299 msg = "indent not greater e.g. " + format_witnesses(witness) 300 raise NannyNag(start[0], msg, line) 301 indents.append(thisguy) 302 303 elif type == DEDENT: 304 # there's nothing we need to check here! what's important is 305 # that when the run of DEDENTs ends, the indentation of the 306 # program statement (or ENDMARKER) that triggered the run is 307 # equal to what's left at the top of the indents stack 308 309 # Ouch! This assert triggers if the last line of the source 310 # is indented *and* lacks a newline -- then DEDENTs pop out 311 # of thin air. 312 # assert check_equal # else no earlier NEWLINE, or an earlier INDENT 313 check_equal = 1 314 315 del indents[-1] 316 317 elif check_equal and type not in JUNK: 318 # this is the first "real token" following a NEWLINE, so it 319 # must be the first token of the next program statement, or an 320 # ENDMARKER; the "line" argument exposes the leading whitespace 321 # for this statement; in the case of ENDMARKER, line is an empty 322 # string, so will properly match the empty string with which the 323 # "indents" stack was seeded 324 check_equal = 0 325 thisguy = Whitespace(line) 326 if not indents[-1].equal(thisguy): 327 witness = indents[-1].not_equal_witness(thisguy) 328 msg = "indent not equal e.g. " + format_witnesses(witness) 329 raise NannyNag(start[0], msg, line) 330 331 332if __name__ == '__main__': 333 main() 334