1#!/usr/bin/env python3 2'''Add syntax highlighting to Python source code''' 3 4__author__ = 'Raymond Hettinger' 5 6import builtins 7import functools 8import html as html_module 9import keyword 10import re 11import tokenize 12 13#### Analyze Python Source ################################# 14 15def is_builtin(s): 16 'Return True if s is the name of a builtin' 17 return hasattr(builtins, s) 18 19def combine_range(lines, start, end): 20 'Join content from a range of lines between start and end' 21 (srow, scol), (erow, ecol) = start, end 22 if srow == erow: 23 return lines[srow-1][scol:ecol], end 24 rows = [lines[srow-1][scol:]] + lines[srow: erow-1] + [lines[erow-1][:ecol]] 25 return ''.join(rows), end 26 27def analyze_python(source): 28 '''Generate and classify chunks of Python for syntax highlighting. 29 Yields tuples in the form: (category, categorized_text). 30 ''' 31 lines = source.splitlines(True) 32 lines.append('') 33 readline = functools.partial(next, iter(lines), '') 34 kind = tok_str = '' 35 tok_type = tokenize.COMMENT 36 written = (1, 0) 37 for tok in tokenize.generate_tokens(readline): 38 prev_tok_type, prev_tok_str = tok_type, tok_str 39 tok_type, tok_str, (srow, scol), (erow, ecol), logical_lineno = tok 40 kind = '' 41 if tok_type == tokenize.COMMENT: 42 kind = 'comment' 43 elif tok_type == tokenize.OP and tok_str[:1] not in '{}[](),.:;@': 44 kind = 'operator' 45 elif tok_type == tokenize.STRING: 46 kind = 'string' 47 if prev_tok_type == tokenize.INDENT or scol==0: 48 kind = 'docstring' 49 elif tok_type == tokenize.NAME: 50 if tok_str in ('def', 'class', 'import', 'from'): 51 kind = 'definition' 52 elif prev_tok_str in ('def', 'class'): 53 kind = 'defname' 54 elif keyword.iskeyword(tok_str): 55 kind = 'keyword' 56 elif is_builtin(tok_str) and prev_tok_str != '.': 57 kind = 'builtin' 58 if kind: 59 text, written = combine_range(lines, written, (srow, scol)) 60 yield '', text 61 text, written = tok_str, (erow, ecol) 62 yield kind, text 63 line_upto_token, written = combine_range(lines, written, (erow, ecol)) 64 yield '', line_upto_token 65 66#### Raw Output ########################################### 67 68def raw_highlight(classified_text): 69 'Straight text display of text classifications' 70 result = [] 71 for kind, text in classified_text: 72 result.append('%15s: %r\n' % (kind or 'plain', text)) 73 return ''.join(result) 74 75#### ANSI Output ########################################### 76 77default_ansi = { 78 'comment': ('\033[0;31m', '\033[0m'), 79 'string': ('\033[0;32m', '\033[0m'), 80 'docstring': ('\033[0;32m', '\033[0m'), 81 'keyword': ('\033[0;33m', '\033[0m'), 82 'builtin': ('\033[0;35m', '\033[0m'), 83 'definition': ('\033[0;33m', '\033[0m'), 84 'defname': ('\033[0;34m', '\033[0m'), 85 'operator': ('\033[0;33m', '\033[0m'), 86} 87 88def ansi_highlight(classified_text, colors=default_ansi): 89 'Add syntax highlighting to source code using ANSI escape sequences' 90 # http://en.wikipedia.org/wiki/ANSI_escape_code 91 result = [] 92 for kind, text in classified_text: 93 opener, closer = colors.get(kind, ('', '')) 94 result += [opener, text, closer] 95 return ''.join(result) 96 97#### HTML Output ########################################### 98 99def html_highlight(classified_text,opener='<pre class="python">\n', closer='</pre>\n'): 100 'Convert classified text to an HTML fragment' 101 result = [opener] 102 for kind, text in classified_text: 103 if kind: 104 result.append('<span class="%s">' % kind) 105 result.append(html_module.escape(text)) 106 if kind: 107 result.append('</span>') 108 result.append(closer) 109 return ''.join(result) 110 111default_css = { 112 '.comment': '{color: crimson;}', 113 '.string': '{color: forestgreen;}', 114 '.docstring': '{color: forestgreen; font-style:italic;}', 115 '.keyword': '{color: darkorange;}', 116 '.builtin': '{color: purple;}', 117 '.definition': '{color: darkorange; font-weight:bold;}', 118 '.defname': '{color: blue;}', 119 '.operator': '{color: brown;}', 120} 121 122default_html = '''\ 123<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" 124 "http://www.w3.org/TR/html4/strict.dtd"> 125<html> 126<head> 127<meta http-equiv="Content-type" content="text/html;charset=UTF-8"> 128<title> {title} </title> 129<style type="text/css"> 130{css} 131</style> 132</head> 133<body> 134{body} 135</body> 136</html> 137''' 138 139def build_html_page(classified_text, title='python', 140 css=default_css, html=default_html): 141 'Create a complete HTML page with colorized source code' 142 css_str = '\n'.join(['%s %s' % item for item in css.items()]) 143 result = html_highlight(classified_text) 144 title = html_module.escape(title) 145 return html.format(title=title, css=css_str, body=result) 146 147#### LaTeX Output ########################################## 148 149default_latex_commands = { 150 'comment': r'{\color{red}#1}', 151 'string': r'{\color{ForestGreen}#1}', 152 'docstring': r'{\emph{\color{ForestGreen}#1}}', 153 'keyword': r'{\color{orange}#1}', 154 'builtin': r'{\color{purple}#1}', 155 'definition': r'{\color{orange}#1}', 156 'defname': r'{\color{blue}#1}', 157 'operator': r'{\color{brown}#1}', 158} 159 160default_latex_document = r''' 161\documentclass{article} 162\usepackage{alltt} 163\usepackage{upquote} 164\usepackage{color} 165\usepackage[usenames,dvipsnames]{xcolor} 166\usepackage[cm]{fullpage} 167%(macros)s 168\begin{document} 169\center{\LARGE{%(title)s}} 170\begin{alltt} 171%(body)s 172\end{alltt} 173\end{document} 174''' 175 176def alltt_escape(s): 177 'Replace backslash and braces with their escaped equivalents' 178 xlat = {'{': r'\{', '}': r'\}', '\\': r'\textbackslash{}'} 179 return re.sub(r'[\\{}]', lambda mo: xlat[mo.group()], s) 180 181def latex_highlight(classified_text, title = 'python', 182 commands = default_latex_commands, 183 document = default_latex_document): 184 'Create a complete LaTeX document with colorized source code' 185 macros = '\n'.join(r'\newcommand{\py%s}[1]{%s}' % c for c in commands.items()) 186 result = [] 187 for kind, text in classified_text: 188 if kind: 189 result.append(r'\py%s{' % kind) 190 result.append(alltt_escape(text)) 191 if kind: 192 result.append('}') 193 return default_latex_document % dict(title=title, macros=macros, body=''.join(result)) 194 195 196if __name__ == '__main__': 197 import argparse 198 import os.path 199 import sys 200 import textwrap 201 import webbrowser 202 203 parser = argparse.ArgumentParser( 204 description = 'Add syntax highlighting to Python source code', 205 formatter_class=argparse.RawDescriptionHelpFormatter, 206 epilog = textwrap.dedent(''' 207 examples: 208 209 # Show syntax highlighted code in the terminal window 210 $ ./highlight.py myfile.py 211 212 # Colorize myfile.py and display in a browser 213 $ ./highlight.py -b myfile.py 214 215 # Create an HTML section to embed in an existing webpage 216 ./highlight.py -s myfile.py 217 218 # Create a complete HTML file 219 $ ./highlight.py -c myfile.py > myfile.html 220 221 # Create a PDF using LaTeX 222 $ ./highlight.py -l myfile.py | pdflatex 223 224 ''')) 225 parser.add_argument('sourcefile', metavar = 'SOURCEFILE', 226 help = 'file containing Python sourcecode') 227 parser.add_argument('-b', '--browser', action = 'store_true', 228 help = 'launch a browser to show results') 229 parser.add_argument('-c', '--complete', action = 'store_true', 230 help = 'build a complete html webpage') 231 parser.add_argument('-l', '--latex', action = 'store_true', 232 help = 'build a LaTeX document') 233 parser.add_argument('-r', '--raw', action = 'store_true', 234 help = 'raw parse of categorized text') 235 parser.add_argument('-s', '--section', action = 'store_true', 236 help = 'show an HTML section rather than a complete webpage') 237 args = parser.parse_args() 238 239 if args.section and (args.browser or args.complete): 240 parser.error('The -s/--section option is incompatible with ' 241 'the -b/--browser or -c/--complete options') 242 243 sourcefile = args.sourcefile 244 with open(sourcefile) as f: 245 source = f.read() 246 classified_text = analyze_python(source) 247 248 if args.raw: 249 encoded = raw_highlight(classified_text) 250 elif args.complete or args.browser: 251 encoded = build_html_page(classified_text, title=sourcefile) 252 elif args.section: 253 encoded = html_highlight(classified_text) 254 elif args.latex: 255 encoded = latex_highlight(classified_text, title=sourcefile) 256 else: 257 encoded = ansi_highlight(classified_text) 258 259 if args.browser: 260 htmlfile = os.path.splitext(os.path.basename(sourcefile))[0] + '.html' 261 with open(htmlfile, 'w') as f: 262 f.write(encoded) 263 webbrowser.open('file://' + os.path.abspath(htmlfile)) 264 else: 265 sys.stdout.write(encoded) 266