1#! /usr/bin/env python 2 3# Released to the public domain, by Tim Peters, 03 October 2000. 4 5"""reindent [-d][-r][-v] [ path ... ] 6 7-d (--dryrun) Dry run. Analyze, but don't make any changes to, files. 8-r (--recurse) Recurse. Search for all .py files in subdirectories too. 9-n (--nobackup) No backup. Does not make a ".bak" file before reindenting. 10-v (--verbose) Verbose. Print informative msgs; else no output. 11-h (--help) Help. Print this usage information and exit. 12 13Change Python (.py) files to use 4-space indents and no hard tab characters. 14Also trim excess spaces and tabs from ends of lines, and remove empty lines 15at the end of files. Also ensure the last line ends with a newline. 16 17If no paths are given on the command line, reindent operates as a filter, 18reading a single source file from standard input and writing the transformed 19source to standard output. In this case, the -d, -r and -v flags are 20ignored. 21 22You can pass one or more file and/or directory paths. When a directory 23path, all .py files within the directory will be examined, and, if the -r 24option is given, likewise recursively for subdirectories. 25 26If output is not to standard output, reindent overwrites files in place, 27renaming the originals with a .bak extension. If it finds nothing to 28change, the file is left alone. If reindent does change a file, the changed 29file is a fixed-point for future runs (i.e., running reindent on the 30resulting .py file won't change it again). 31 32The hard part of reindenting is figuring out what to do with comment 33lines. So long as the input files get a clean bill of health from 34tabnanny.py, reindent should do a good job. 35 36The backup file is a copy of the one that is being reindented. The ".bak" 37file is generated with shutil.copy(), but some corner cases regarding 38user/group and permissions could leave the backup file more readable than 39you'd prefer. You can always use the --nobackup option to prevent this. 40""" 41 42__version__ = "1" 43 44import tokenize 45import os, shutil 46import sys 47import io 48 49verbose = 0 50recurse = 0 51dryrun = 0 52makebackup = True 53 54def usage(msg=None): 55 if msg is not None: 56 print >> sys.stderr, msg 57 print >> sys.stderr, __doc__ 58 59def errprint(*args): 60 sep = "" 61 for arg in args: 62 sys.stderr.write(sep + str(arg)) 63 sep = " " 64 sys.stderr.write("\n") 65 66def main(): 67 import getopt 68 global verbose, recurse, dryrun, makebackup 69 try: 70 opts, args = getopt.getopt(sys.argv[1:], "drnvh", 71 ["dryrun", "recurse", "nobackup", "verbose", "help"]) 72 except getopt.error, msg: 73 usage(msg) 74 return 75 for o, a in opts: 76 if o in ('-d', '--dryrun'): 77 dryrun += 1 78 elif o in ('-r', '--recurse'): 79 recurse += 1 80 elif o in ('-n', '--nobackup'): 81 makebackup = False 82 elif o in ('-v', '--verbose'): 83 verbose += 1 84 elif o in ('-h', '--help'): 85 usage() 86 return 87 if not args: 88 r = Reindenter(sys.stdin) 89 r.run() 90 r.write(sys.stdout) 91 return 92 for arg in args: 93 check(arg) 94 95def check(file): 96 if os.path.isdir(file) and not os.path.islink(file): 97 if verbose: 98 print "listing directory", file 99 names = os.listdir(file) 100 for name in names: 101 fullname = os.path.join(file, name) 102 if ((recurse and os.path.isdir(fullname) and 103 not os.path.islink(fullname) and 104 not os.path.split(fullname)[1].startswith(".")) 105 or name.lower().endswith(".py")): 106 check(fullname) 107 return 108 109 if verbose: 110 print "checking", file, "...", 111 try: 112 f = open(file, "rb") 113 except IOError, msg: 114 errprint("%s: I/O Error: %s" % (file, str(msg))) 115 return 116 117 r = Reindenter(f) 118 f.close() 119 120 newline = r.newlines 121 if isinstance(newline, tuple): 122 errprint("%s: mixed newlines detected; cannot process file" % file) 123 return 124 125 if r.run(): 126 if verbose: 127 print "changed." 128 if dryrun: 129 print "But this is a dry run, so leaving it alone." 130 if not dryrun: 131 bak = file + ".bak" 132 if makebackup: 133 shutil.copyfile(file, bak) 134 if verbose: 135 print "backed up", file, "to", bak 136 f = open(file, "wb") 137 r.write(f) 138 f.close() 139 if verbose: 140 print "wrote new", file 141 return True 142 else: 143 if verbose: 144 print "unchanged." 145 return False 146 147def _detect_newlines(lines): 148 newlines = {'\r\n' if line[-2:] == '\r\n' else 149 '\n' if line[-1:] == '\n' else 150 '\r' if line[-1:] == '\r' else 151 '' 152 for line in lines} 153 newlines.discard('') 154 newlines = tuple(sorted(newlines)) 155 if not newlines: 156 return '\n' 157 if len(newlines) == 1: 158 return newlines[0] 159 return newlines 160 161def _rstrip(line, JUNK='\r\n \t'): 162 """Return line stripped of trailing spaces, tabs, newlines. 163 164 Note that line.rstrip() instead also strips sundry control characters, 165 but at least one known Emacs user expects to keep junk like that, not 166 mentioning Barry by name or anything <wink>. 167 """ 168 169 i = len(line) 170 while i > 0 and line[i-1] in JUNK: 171 i -= 1 172 return line[:i] 173 174class Reindenter: 175 176 def __init__(self, f): 177 self.find_stmt = 1 # next token begins a fresh stmt? 178 self.level = 0 # current indent level 179 180 # Raw file lines. 181 self.raw = f.readlines() 182 183 # Save the newlines found in the file so they can be used to 184 # create output without mutating the newlines. 185 self.newlines = _detect_newlines(self.raw) 186 if isinstance(self.newlines, tuple): 187 self.newline = self.newlines[0] 188 else: 189 self.newline = self.newlines 190 191 # File lines, rstripped & tab-expanded. Dummy at start is so 192 # that we can use tokenize's 1-based line numbering easily. 193 # Note that a line is all-blank iff it's newline. 194 self.lines = [_rstrip(line).expandtabs() + self.newline 195 for line in self.raw] 196 self.lines.insert(0, None) 197 self.index = 1 # index into self.lines of next line 198 199 # List of (lineno, indentlevel) pairs, one for each stmt and 200 # comment line. indentlevel is -1 for comment lines, as a 201 # signal that tokenize doesn't know what to do about them; 202 # indeed, they're our headache! 203 self.stats = [] 204 205 def run(self): 206 tokenize.tokenize(self.getline, self.tokeneater) 207 # Remove trailing empty lines. 208 lines = self.lines 209 while lines and lines[-1] == self.newline: 210 lines.pop() 211 # Sentinel. 212 stats = self.stats 213 stats.append((len(lines), 0)) 214 # Map count of leading spaces to # we want. 215 have2want = {} 216 # Program after transformation. 217 after = self.after = [] 218 # Copy over initial empty lines -- there's nothing to do until 219 # we see a line with *something* on it. 220 i = stats[0][0] 221 after.extend(lines[1:i]) 222 for i in range(len(stats)-1): 223 thisstmt, thislevel = stats[i] 224 nextstmt = stats[i+1][0] 225 have = getlspace(lines[thisstmt]) 226 want = thislevel * 4 227 if want < 0: 228 # A comment line. 229 if have: 230 # An indented comment line. If we saw the same 231 # indentation before, reuse what it most recently 232 # mapped to. 233 want = have2want.get(have, -1) 234 if want < 0: 235 # Then it probably belongs to the next real stmt. 236 for j in xrange(i+1, len(stats)-1): 237 jline, jlevel = stats[j] 238 if jlevel >= 0: 239 if have == getlspace(lines[jline]): 240 want = jlevel * 4 241 break 242 if want < 0: # Maybe it's a hanging 243 # comment like this one, 244 # in which case we should shift it like its base 245 # line got shifted. 246 for j in xrange(i-1, -1, -1): 247 jline, jlevel = stats[j] 248 if jlevel >= 0: 249 want = have + getlspace(after[jline-1]) - \ 250 getlspace(lines[jline]) 251 break 252 if want < 0: 253 # Still no luck -- leave it alone. 254 want = have 255 else: 256 want = 0 257 assert want >= 0 258 have2want[have] = want 259 diff = want - have 260 if diff == 0 or have == 0: 261 after.extend(lines[thisstmt:nextstmt]) 262 else: 263 for line in lines[thisstmt:nextstmt]: 264 if diff > 0: 265 if line == self.newline: 266 after.append(line) 267 else: 268 after.append(" " * diff + line) 269 else: 270 remove = min(getlspace(line), -diff) 271 after.append(line[remove:]) 272 return self.raw != self.after 273 274 def write(self, f): 275 f.writelines(self.after) 276 277 # Line-getter for tokenize. 278 def getline(self): 279 if self.index >= len(self.lines): 280 line = "" 281 else: 282 line = self.lines[self.index] 283 self.index += 1 284 return line 285 286 # Line-eater for tokenize. 287 def tokeneater(self, type, token, (sline, scol), end, line, 288 INDENT=tokenize.INDENT, 289 DEDENT=tokenize.DEDENT, 290 NEWLINE=tokenize.NEWLINE, 291 COMMENT=tokenize.COMMENT, 292 NL=tokenize.NL): 293 294 if type == NEWLINE: 295 # A program statement, or ENDMARKER, will eventually follow, 296 # after some (possibly empty) run of tokens of the form 297 # (NL | COMMENT)* (INDENT | DEDENT+)? 298 self.find_stmt = 1 299 300 elif type == INDENT: 301 self.find_stmt = 1 302 self.level += 1 303 304 elif type == DEDENT: 305 self.find_stmt = 1 306 self.level -= 1 307 308 elif type == COMMENT: 309 if self.find_stmt: 310 self.stats.append((sline, -1)) 311 # but we're still looking for a new stmt, so leave 312 # find_stmt alone 313 314 elif type == NL: 315 pass 316 317 elif self.find_stmt: 318 # This is the first "real token" following a NEWLINE, so it 319 # must be the first token of the next program statement, or an 320 # ENDMARKER. 321 self.find_stmt = 0 322 if line: # not endmarker 323 self.stats.append((sline, self.level)) 324 325# Count number of leading blanks. 326def getlspace(line): 327 i, n = 0, len(line) 328 while i < n and line[i] == " ": 329 i += 1 330 return i 331 332if __name__ == '__main__': 333 main() 334