1#! /usr/bin/env python 2 3# This file contains a class and a main program that perform three 4# related (though complimentary) formatting operations on Python 5# programs. When called as "pindent -c", it takes a valid Python 6# program as input and outputs a version augmented with block-closing 7# comments. When called as "pindent -d", it assumes its input is a 8# Python program with block-closing comments and outputs a commentless 9# version. When called as "pindent -r" it assumes its input is a 10# Python program with block-closing comments but with its indentation 11# messed up, and outputs a properly indented version. 12 13# A "block-closing comment" is a comment of the form '# end <keyword>' 14# where <keyword> is the keyword that opened the block. If the 15# opening keyword is 'def' or 'class', the function or class name may 16# be repeated in the block-closing comment as well. Here is an 17# example of a program fully augmented with block-closing comments: 18 19# def foobar(a, b): 20# if a == b: 21# a = a+1 22# elif a < b: 23# b = b-1 24# if b > a: a = a-1 25# # end if 26# else: 27# print 'oops!' 28# # end if 29# # end def foobar 30 31# Note that only the last part of an if...elif...else... block needs a 32# block-closing comment; the same is true for other compound 33# statements (e.g. try...except). Also note that "short-form" blocks 34# like the second 'if' in the example must be closed as well; 35# otherwise the 'else' in the example would be ambiguous (remember 36# that indentation is not significant when interpreting block-closing 37# comments). 38 39# The operations are idempotent (i.e. applied to their own output 40# they yield an identical result). Running first "pindent -c" and 41# then "pindent -r" on a valid Python program produces a program that 42# is semantically identical to the input (though its indentation may 43# be different). Running "pindent -e" on that output produces a 44# program that only differs from the original in indentation. 45 46# Other options: 47# -s stepsize: set the indentation step size (default 8) 48# -t tabsize : set the number of spaces a tab character is worth (default 8) 49# -e : expand TABs into spaces 50# file ... : input file(s) (default standard input) 51# The results always go to standard output 52 53# Caveats: 54# - comments ending in a backslash will be mistaken for continued lines 55# - continuations using backslash are always left unchanged 56# - continuations inside parentheses are not extra indented by -r 57# but must be indented for -c to work correctly (this breaks 58# idempotency!) 59# - continued lines inside triple-quoted strings are totally garbled 60 61# Secret feature: 62# - On input, a block may also be closed with an "end statement" -- 63# this is a block-closing comment without the '#' sign. 64 65# Possible improvements: 66# - check syntax based on transitions in 'next' table 67# - better error reporting 68# - better error recovery 69# - check identifier after class/def 70 71# The following wishes need a more complete tokenization of the source: 72# - Don't get fooled by comments ending in backslash 73# - reindent continuation lines indicated by backslash 74# - handle continuation lines inside parentheses/braces/brackets 75# - handle triple quoted strings spanning lines 76# - realign comments 77# - optionally do much more thorough reformatting, a la C indent 78 79from __future__ import print_function 80 81# Defaults 82STEPSIZE = 8 83TABSIZE = 8 84EXPANDTABS = False 85 86import io 87import re 88import sys 89 90next = {} 91next['if'] = next['elif'] = 'elif', 'else', 'end' 92next['while'] = next['for'] = 'else', 'end' 93next['try'] = 'except', 'finally' 94next['except'] = 'except', 'else', 'finally', 'end' 95next['else'] = next['finally'] = next['with'] = \ 96 next['def'] = next['class'] = 'end' 97next['end'] = () 98start = 'if', 'while', 'for', 'try', 'with', 'def', 'class' 99 100class PythonIndenter: 101 102 def __init__(self, fpi = sys.stdin, fpo = sys.stdout, 103 indentsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 104 self.fpi = fpi 105 self.fpo = fpo 106 self.indentsize = indentsize 107 self.tabsize = tabsize 108 self.lineno = 0 109 self.expandtabs = expandtabs 110 self._write = fpo.write 111 self.kwprog = re.compile( 112 r'^(?:\s|\\\n)*(?P<kw>[a-z]+)' 113 r'((?:\s|\\\n)+(?P<id>[a-zA-Z_]\w*))?' 114 r'[^\w]') 115 self.endprog = re.compile( 116 r'^(?:\s|\\\n)*#?\s*end\s+(?P<kw>[a-z]+)' 117 r'(\s+(?P<id>[a-zA-Z_]\w*))?' 118 r'[^\w]') 119 self.wsprog = re.compile(r'^[ \t]*') 120 # end def __init__ 121 122 def write(self, line): 123 if self.expandtabs: 124 self._write(line.expandtabs(self.tabsize)) 125 else: 126 self._write(line) 127 # end if 128 # end def write 129 130 def readline(self): 131 line = self.fpi.readline() 132 if line: self.lineno += 1 133 # end if 134 return line 135 # end def readline 136 137 def error(self, fmt, *args): 138 if args: fmt = fmt % args 139 # end if 140 sys.stderr.write('Error at line %d: %s\n' % (self.lineno, fmt)) 141 self.write('### %s ###\n' % fmt) 142 # end def error 143 144 def getline(self): 145 line = self.readline() 146 while line[-2:] == '\\\n': 147 line2 = self.readline() 148 if not line2: break 149 # end if 150 line += line2 151 # end while 152 return line 153 # end def getline 154 155 def putline(self, line, indent): 156 tabs, spaces = divmod(indent*self.indentsize, self.tabsize) 157 i = self.wsprog.match(line).end() 158 line = line[i:] 159 if line[:1] not in ('\n', '\r', ''): 160 line = '\t'*tabs + ' '*spaces + line 161 # end if 162 self.write(line) 163 # end def putline 164 165 def reformat(self): 166 stack = [] 167 while True: 168 line = self.getline() 169 if not line: break # EOF 170 # end if 171 m = self.endprog.match(line) 172 if m: 173 kw = 'end' 174 kw2 = m.group('kw') 175 if not stack: 176 self.error('unexpected end') 177 elif stack.pop()[0] != kw2: 178 self.error('unmatched end') 179 # end if 180 self.putline(line, len(stack)) 181 continue 182 # end if 183 m = self.kwprog.match(line) 184 if m: 185 kw = m.group('kw') 186 if kw in start: 187 self.putline(line, len(stack)) 188 stack.append((kw, kw)) 189 continue 190 # end if 191 if next.has_key(kw) and stack: 192 self.putline(line, len(stack)-1) 193 kwa, kwb = stack[-1] 194 stack[-1] = kwa, kw 195 continue 196 # end if 197 # end if 198 self.putline(line, len(stack)) 199 # end while 200 if stack: 201 self.error('unterminated keywords') 202 for kwa, kwb in stack: 203 self.write('\t%s\n' % kwa) 204 # end for 205 # end if 206 # end def reformat 207 208 def delete(self): 209 begin_counter = 0 210 end_counter = 0 211 while True: 212 line = self.getline() 213 if not line: break # EOF 214 # end if 215 m = self.endprog.match(line) 216 if m: 217 end_counter += 1 218 continue 219 # end if 220 m = self.kwprog.match(line) 221 if m: 222 kw = m.group('kw') 223 if kw in start: 224 begin_counter += 1 225 # end if 226 # end if 227 self.write(line) 228 # end while 229 if begin_counter - end_counter < 0: 230 sys.stderr.write('Warning: input contained more end tags than expected\n') 231 elif begin_counter - end_counter > 0: 232 sys.stderr.write('Warning: input contained less end tags than expected\n') 233 # end if 234 # end def delete 235 236 def complete(self): 237 stack = [] 238 todo = [] 239 currentws = thisid = firstkw = lastkw = topid = '' 240 while True: 241 line = self.getline() 242 i = self.wsprog.match(line).end() 243 m = self.endprog.match(line) 244 if m: 245 thiskw = 'end' 246 endkw = m.group('kw') 247 thisid = m.group('id') 248 else: 249 m = self.kwprog.match(line) 250 if m: 251 thiskw = m.group('kw') 252 if not next.has_key(thiskw): 253 thiskw = '' 254 # end if 255 if thiskw in ('def', 'class'): 256 thisid = m.group('id') 257 else: 258 thisid = '' 259 # end if 260 elif line[i:i+1] in ('\n', '#'): 261 todo.append(line) 262 continue 263 else: 264 thiskw = '' 265 # end if 266 # end if 267 indentws = line[:i] 268 indent = len(indentws.expandtabs(self.tabsize)) 269 current = len(currentws.expandtabs(self.tabsize)) 270 while indent < current: 271 if firstkw: 272 if topid: 273 s = '# end %s %s\n' % ( 274 firstkw, topid) 275 else: 276 s = '# end %s\n' % firstkw 277 # end if 278 self.write(currentws + s) 279 firstkw = lastkw = '' 280 # end if 281 currentws, firstkw, lastkw, topid = stack.pop() 282 current = len(currentws.expandtabs(self.tabsize)) 283 # end while 284 if indent == current and firstkw: 285 if thiskw == 'end': 286 if endkw != firstkw: 287 self.error('mismatched end') 288 # end if 289 firstkw = lastkw = '' 290 elif not thiskw or thiskw in start: 291 if topid: 292 s = '# end %s %s\n' % ( 293 firstkw, topid) 294 else: 295 s = '# end %s\n' % firstkw 296 # end if 297 self.write(currentws + s) 298 firstkw = lastkw = topid = '' 299 # end if 300 # end if 301 if indent > current: 302 stack.append((currentws, firstkw, lastkw, topid)) 303 if thiskw and thiskw not in start: 304 # error 305 thiskw = '' 306 # end if 307 currentws, firstkw, lastkw, topid = \ 308 indentws, thiskw, thiskw, thisid 309 # end if 310 if thiskw: 311 if thiskw in start: 312 firstkw = lastkw = thiskw 313 topid = thisid 314 else: 315 lastkw = thiskw 316 # end if 317 # end if 318 for l in todo: self.write(l) 319 # end for 320 todo = [] 321 if not line: break 322 # end if 323 self.write(line) 324 # end while 325 # end def complete 326# end class PythonIndenter 327 328# Simplified user interface 329# - xxx_filter(input, output): read and write file objects 330# - xxx_string(s): take and return string object 331# - xxx_file(filename): process file in place, return true iff changed 332 333def complete_filter(input = sys.stdin, output = sys.stdout, 334 stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 335 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs) 336 pi.complete() 337# end def complete_filter 338 339def delete_filter(input= sys.stdin, output = sys.stdout, 340 stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 341 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs) 342 pi.delete() 343# end def delete_filter 344 345def reformat_filter(input = sys.stdin, output = sys.stdout, 346 stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 347 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs) 348 pi.reformat() 349# end def reformat_filter 350 351def complete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 352 input = io.BytesIO(source) 353 output = io.BytesIO() 354 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs) 355 pi.complete() 356 return output.getvalue() 357# end def complete_string 358 359def delete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 360 input = io.BytesIO(source) 361 output = io.BytesIO() 362 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs) 363 pi.delete() 364 return output.getvalue() 365# end def delete_string 366 367def reformat_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 368 input = io.BytesIO(source) 369 output = io.BytesIO() 370 pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs) 371 pi.reformat() 372 return output.getvalue() 373# end def reformat_string 374 375def make_backup(filename): 376 import os, os.path 377 backup = filename + '~' 378 if os.path.lexists(backup): 379 try: 380 os.remove(backup) 381 except os.error: 382 print("Can't remove backup %r" % (backup,), file=sys.stderr) 383 # end try 384 # end if 385 try: 386 os.rename(filename, backup) 387 except os.error: 388 print("Can't rename %r to %r" % (filename, backup), file=sys.stderr) 389 # end try 390# end def make_backup 391 392def complete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 393 with open(filename, 'r') as f: 394 source = f.read() 395 # end with 396 result = complete_string(source, stepsize, tabsize, expandtabs) 397 if source == result: return 0 398 # end if 399 make_backup(filename) 400 with open(filename, 'w') as f: 401 f.write(result) 402 # end with 403 return 1 404# end def complete_file 405 406def delete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 407 with open(filename, 'r') as f: 408 source = f.read() 409 # end with 410 result = delete_string(source, stepsize, tabsize, expandtabs) 411 if source == result: return 0 412 # end if 413 make_backup(filename) 414 with open(filename, 'w') as f: 415 f.write(result) 416 # end with 417 return 1 418# end def delete_file 419 420def reformat_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS): 421 with open(filename, 'r') as f: 422 source = f.read() 423 # end with 424 result = reformat_string(source, stepsize, tabsize, expandtabs) 425 if source == result: return 0 426 # end if 427 make_backup(filename) 428 with open(filename, 'w') as f: 429 f.write(result) 430 # end with 431 return 1 432# end def reformat_file 433 434# Test program when called as a script 435 436usage = """ 437usage: pindent (-c|-d|-r) [-s stepsize] [-t tabsize] [-e] [file] ... 438-c : complete a correctly indented program (add #end directives) 439-d : delete #end directives 440-r : reformat a completed program (use #end directives) 441-s stepsize: indentation step (default %(STEPSIZE)d) 442-t tabsize : the worth in spaces of a tab (default %(TABSIZE)d) 443-e : expand TABs into spaces (default OFF) 444[file] ... : files are changed in place, with backups in file~ 445If no files are specified or a single - is given, 446the program acts as a filter (reads stdin, writes stdout). 447""" % vars() 448 449def error_both(op1, op2): 450 sys.stderr.write('Error: You can not specify both '+op1+' and -'+op2[0]+' at the same time\n') 451 sys.stderr.write(usage) 452 sys.exit(2) 453# end def error_both 454 455def test(): 456 import getopt 457 try: 458 opts, args = getopt.getopt(sys.argv[1:], 'cdrs:t:e') 459 except getopt.error, msg: 460 sys.stderr.write('Error: %s\n' % msg) 461 sys.stderr.write(usage) 462 sys.exit(2) 463 # end try 464 action = None 465 stepsize = STEPSIZE 466 tabsize = TABSIZE 467 expandtabs = EXPANDTABS 468 for o, a in opts: 469 if o == '-c': 470 if action: error_both(o, action) 471 # end if 472 action = 'complete' 473 elif o == '-d': 474 if action: error_both(o, action) 475 # end if 476 action = 'delete' 477 elif o == '-r': 478 if action: error_both(o, action) 479 # end if 480 action = 'reformat' 481 elif o == '-s': 482 stepsize = int(a) 483 elif o == '-t': 484 tabsize = int(a) 485 elif o == '-e': 486 expandtabs = True 487 # end if 488 # end for 489 if not action: 490 sys.stderr.write( 491 'You must specify -c(omplete), -d(elete) or -r(eformat)\n') 492 sys.stderr.write(usage) 493 sys.exit(2) 494 # end if 495 if not args or args == ['-']: 496 action = eval(action + '_filter') 497 action(sys.stdin, sys.stdout, stepsize, tabsize, expandtabs) 498 else: 499 action = eval(action + '_file') 500 for filename in args: 501 action(filename, stepsize, tabsize, expandtabs) 502 # end for 503 # end if 504# end def test 505 506if __name__ == '__main__': 507 test() 508# end if 509