1#! /usr/bin/env python3 2 3# Released to the public domain, by Tim Peters, 03 October 2000. 4 5"""reindent [-d][-r][-v] [ path ... ] 6 7-d (--dryrun) Dry run. Analyze, but don't make any changes to, files. 8-r (--recurse) Recurse. Search for all .py files in subdirectories too. 9-n (--nobackup) No backup. Does not make a ".bak" file before reindenting. 10-v (--verbose) Verbose. Print informative msgs; else no output. 11 (--newline) Newline. Specify the newline character to use (CRLF, LF). 12 Default is the same as the original file. 13-h (--help) Help. Print this usage information and exit. 14 15Change Python (.py) files to use 4-space indents and no hard tab characters. 16Also trim excess spaces and tabs from ends of lines, and remove empty lines 17at the end of files. Also ensure the last line ends with a newline. 18 19If no paths are given on the command line, reindent operates as a filter, 20reading a single source file from standard input and writing the transformed 21source to standard output. In this case, the -d, -r and -v flags are 22ignored. 23 24You can pass one or more file and/or directory paths. When a directory 25path, all .py files within the directory will be examined, and, if the -r 26option is given, likewise recursively for subdirectories. 27 28If output is not to standard output, reindent overwrites files in place, 29renaming the originals with a .bak extension. If it finds nothing to 30change, the file is left alone. If reindent does change a file, the changed 31file is a fixed-point for future runs (i.e., running reindent on the 32resulting .py file won't change it again). 33 34The hard part of reindenting is figuring out what to do with comment 35lines. So long as the input files get a clean bill of health from 36tabnanny.py, reindent should do a good job. 37 38The backup file is a copy of the one that is being reindented. The ".bak" 39file is generated with shutil.copy(), but some corner cases regarding 40user/group and permissions could leave the backup file more readable than 41you'd prefer. You can always use the --nobackup option to prevent this. 42""" 43 44__version__ = "1" 45 46import tokenize 47import os 48import shutil 49import sys 50 51verbose = False 52recurse = False 53dryrun = False 54makebackup = True 55# A specified newline to be used in the output (set by --newline option) 56spec_newline = None 57 58 59def usage(msg=None): 60 if msg is None: 61 msg = __doc__ 62 print(msg, file=sys.stderr) 63 64 65def errprint(*args): 66 sys.stderr.write(" ".join(str(arg) for arg in args)) 67 sys.stderr.write("\n") 68 69def main(): 70 import getopt 71 global verbose, recurse, dryrun, makebackup, spec_newline 72 try: 73 opts, args = getopt.getopt(sys.argv[1:], "drnvh", 74 ["dryrun", "recurse", "nobackup", "verbose", "newline=", "help"]) 75 except getopt.error as msg: 76 usage(msg) 77 return 78 for o, a in opts: 79 if o in ('-d', '--dryrun'): 80 dryrun = True 81 elif o in ('-r', '--recurse'): 82 recurse = True 83 elif o in ('-n', '--nobackup'): 84 makebackup = False 85 elif o in ('-v', '--verbose'): 86 verbose = True 87 elif o in ('--newline',): 88 if not a.upper() in ('CRLF', 'LF'): 89 usage() 90 return 91 spec_newline = dict(CRLF='\r\n', LF='\n')[a.upper()] 92 elif o in ('-h', '--help'): 93 usage() 94 return 95 if not args: 96 r = Reindenter(sys.stdin) 97 r.run() 98 r.write(sys.stdout) 99 return 100 for arg in args: 101 check(arg) 102 103 104def check(file): 105 if os.path.isdir(file) and not os.path.islink(file): 106 if verbose: 107 print("listing directory", file) 108 names = os.listdir(file) 109 for name in names: 110 fullname = os.path.join(file, name) 111 if ((recurse and os.path.isdir(fullname) and 112 not os.path.islink(fullname) and 113 not os.path.split(fullname)[1].startswith(".")) 114 or name.lower().endswith(".py")): 115 check(fullname) 116 return 117 118 if verbose: 119 print("checking", file, "...", end=' ') 120 with open(file, 'rb') as f: 121 try: 122 encoding, _ = tokenize.detect_encoding(f.readline) 123 except SyntaxError as se: 124 errprint("%s: SyntaxError: %s" % (file, str(se))) 125 return 126 try: 127 with open(file, encoding=encoding) as f: 128 r = Reindenter(f) 129 except IOError as msg: 130 errprint("%s: I/O Error: %s" % (file, str(msg))) 131 return 132 133 newline = spec_newline if spec_newline else r.newlines 134 if isinstance(newline, tuple): 135 errprint("%s: mixed newlines detected; cannot continue without --newline" % file) 136 return 137 138 if r.run(): 139 if verbose: 140 print("changed.") 141 if dryrun: 142 print("But this is a dry run, so leaving it alone.") 143 if not dryrun: 144 bak = file + ".bak" 145 if makebackup: 146 shutil.copyfile(file, bak) 147 if verbose: 148 print("backed up", file, "to", bak) 149 with open(file, "w", encoding=encoding, newline=newline) as f: 150 r.write(f) 151 if verbose: 152 print("wrote new", file) 153 return True 154 else: 155 if verbose: 156 print("unchanged.") 157 return False 158 159 160def _rstrip(line, JUNK='\n \t'): 161 """Return line stripped of trailing spaces, tabs, newlines. 162 163 Note that line.rstrip() instead also strips sundry control characters, 164 but at least one known Emacs user expects to keep junk like that, not 165 mentioning Barry by name or anything <wink>. 166 """ 167 168 i = len(line) 169 while i > 0 and line[i - 1] in JUNK: 170 i -= 1 171 return line[:i] 172 173 174class Reindenter: 175 176 def __init__(self, f): 177 self.find_stmt = 1 # next token begins a fresh stmt? 178 self.level = 0 # current indent level 179 180 # Raw file lines. 181 self.raw = f.readlines() 182 183 # File lines, rstripped & tab-expanded. Dummy at start is so 184 # that we can use tokenize's 1-based line numbering easily. 185 # Note that a line is all-blank iff it's "\n". 186 self.lines = [_rstrip(line).expandtabs() + "\n" 187 for line in self.raw] 188 self.lines.insert(0, None) 189 self.index = 1 # index into self.lines of next line 190 191 # List of (lineno, indentlevel) pairs, one for each stmt and 192 # comment line. indentlevel is -1 for comment lines, as a 193 # signal that tokenize doesn't know what to do about them; 194 # indeed, they're our headache! 195 self.stats = [] 196 197 # Save the newlines found in the file so they can be used to 198 # create output without mutating the newlines. 199 self.newlines = f.newlines 200 201 def run(self): 202 tokens = tokenize.generate_tokens(self.getline) 203 for _token in tokens: 204 self.tokeneater(*_token) 205 # Remove trailing empty lines. 206 lines = self.lines 207 while lines and lines[-1] == "\n": 208 lines.pop() 209 # Sentinel. 210 stats = self.stats 211 stats.append((len(lines), 0)) 212 # Map count of leading spaces to # we want. 213 have2want = {} 214 # Program after transformation. 215 after = self.after = [] 216 # Copy over initial empty lines -- there's nothing to do until 217 # we see a line with *something* on it. 218 i = stats[0][0] 219 after.extend(lines[1:i]) 220 for i in range(len(stats) - 1): 221 thisstmt, thislevel = stats[i] 222 nextstmt = stats[i + 1][0] 223 have = getlspace(lines[thisstmt]) 224 want = thislevel * 4 225 if want < 0: 226 # A comment line. 227 if have: 228 # An indented comment line. If we saw the same 229 # indentation before, reuse what it most recently 230 # mapped to. 231 want = have2want.get(have, -1) 232 if want < 0: 233 # Then it probably belongs to the next real stmt. 234 for j in range(i + 1, len(stats) - 1): 235 jline, jlevel = stats[j] 236 if jlevel >= 0: 237 if have == getlspace(lines[jline]): 238 want = jlevel * 4 239 break 240 if want < 0: # Maybe it's a hanging 241 # comment like this one, 242 # in which case we should shift it like its base 243 # line got shifted. 244 for j in range(i - 1, -1, -1): 245 jline, jlevel = stats[j] 246 if jlevel >= 0: 247 want = have + (getlspace(after[jline - 1]) - 248 getlspace(lines[jline])) 249 break 250 if want < 0: 251 # Still no luck -- leave it alone. 252 want = have 253 else: 254 want = 0 255 assert want >= 0 256 have2want[have] = want 257 diff = want - have 258 if diff == 0 or have == 0: 259 after.extend(lines[thisstmt:nextstmt]) 260 else: 261 for line in lines[thisstmt:nextstmt]: 262 if diff > 0: 263 if line == "\n": 264 after.append(line) 265 else: 266 after.append(" " * diff + line) 267 else: 268 remove = min(getlspace(line), -diff) 269 after.append(line[remove:]) 270 return self.raw != self.after 271 272 def write(self, f): 273 f.writelines(self.after) 274 275 # Line-getter for tokenize. 276 def getline(self): 277 if self.index >= len(self.lines): 278 line = "" 279 else: 280 line = self.lines[self.index] 281 self.index += 1 282 return line 283 284 # Line-eater for tokenize. 285 def tokeneater(self, type, token, slinecol, end, line, 286 INDENT=tokenize.INDENT, 287 DEDENT=tokenize.DEDENT, 288 NEWLINE=tokenize.NEWLINE, 289 COMMENT=tokenize.COMMENT, 290 NL=tokenize.NL): 291 292 if type == NEWLINE: 293 # A program statement, or ENDMARKER, will eventually follow, 294 # after some (possibly empty) run of tokens of the form 295 # (NL | COMMENT)* (INDENT | DEDENT+)? 296 self.find_stmt = 1 297 298 elif type == INDENT: 299 self.find_stmt = 1 300 self.level += 1 301 302 elif type == DEDENT: 303 self.find_stmt = 1 304 self.level -= 1 305 306 elif type == COMMENT: 307 if self.find_stmt: 308 self.stats.append((slinecol[0], -1)) 309 # but we're still looking for a new stmt, so leave 310 # find_stmt alone 311 312 elif type == NL: 313 pass 314 315 elif self.find_stmt: 316 # This is the first "real token" following a NEWLINE, so it 317 # must be the first token of the next program statement, or an 318 # ENDMARKER. 319 self.find_stmt = 0 320 if line: # not endmarker 321 self.stats.append((slinecol[0], self.level)) 322 323 324# Count number of leading blanks. 325def getlspace(line): 326 i, n = 0, len(line) 327 while i < n and line[i] == " ": 328 i += 1 329 return i 330 331 332if __name__ == '__main__': 333 main() 334