1""" TeXcheck.py -- rough syntax checking on Python style LaTeX documents. 2 3 Written by Raymond D. Hettinger <python at rcn.com> 4 Copyright (c) 2003 Python Software Foundation. All rights reserved. 5 6Designed to catch common markup errors including: 7* Unbalanced or mismatched parenthesis, brackets, and braces. 8* Unbalanced or mismatched \\begin and \\end blocks. 9* Misspelled or invalid LaTeX commands. 10* Use of forward slashes instead of backslashes for commands. 11* Table line size mismatches. 12 13Sample command line usage: 14 python texcheck.py -k chapterheading -m lib/librandomtex *.tex 15 16Options: 17 -m Munge parenthesis and brackets. [0,n) would normally mismatch. 18 -k keyword: Keyword is a valid LaTeX command. Do not include the backslash. 19 -d: Delimiter check only (useful for non-LaTeX files). 20 -h: Help 21 -s lineno: Start at lineno (useful for skipping complex sections). 22 -v: Verbose. Trace the matching of //begin and //end blocks. 23""" 24 25import re 26import sys 27import getopt 28from itertools import izip, count, islice 29import glob 30 31cmdstr = r""" 32 \section \module \declaremodule \modulesynopsis \moduleauthor 33 \sectionauthor \versionadded \code \class \method \begin 34 \optional \var \ref \end \subsection \lineiii \hline \label 35 \indexii \textrm \ldots \keyword \stindex \index \item \note 36 \withsubitem \ttindex \footnote \citetitle \samp \opindex 37 \noindent \exception \strong \dfn \ctype \obindex \character 38 \indexiii \function \bifuncindex \refmodule \refbimodindex 39 \subsubsection \nodename \member \chapter \emph \ASCII \UNIX 40 \regexp \program \production \token \productioncont \term 41 \grammartoken \lineii \seemodule \file \EOF \documentclass 42 \usepackage \title \input \maketitle \ifhtml \fi \url \Cpp 43 \tableofcontents \kbd \programopt \envvar \refstmodindex 44 \cfunction \constant \NULL \moreargs \cfuncline \cdata 45 \textasciicircum \n \ABC \setindexsubitem \versionchanged 46 \deprecated \seetext \newcommand \POSIX \pep \warning \rfc 47 \verbatiminput \methodline \textgreater \seetitle \lineiv 48 \funclineni \ulink \manpage \funcline \dataline \unspecified 49 \textbackslash \mimetype \mailheader \seepep \textunderscore 50 \longprogramopt \infinity \plusminus \shortversion \version 51 \refmodindex \seerfc \makeindex \makemodindex \renewcommand 52 \indexname \appendix \protect \indexiv \mbox \textasciitilde 53 \platform \seeurl \leftmargin \labelwidth \localmoduletable 54 \LaTeX \copyright \memberline \backslash \pi \centerline 55 \caption \vspace \textwidth \menuselection \textless 56 \makevar \csimplemacro \menuselection \bfcode \sub \release 57 \email \kwindex \refexmodindex \filenq \e \menuselection 58 \exindex \linev \newsgroup \verbatim \setshortversion 59 \author \authoraddress \paragraph \subparagraph \cmemberline 60 \textbar \C \seelink 61""" 62 63def matchclose(c_lineno, c_symbol, openers, pairmap): 64 "Verify that closing delimiter matches most recent opening delimiter" 65 try: 66 o_lineno, o_symbol = openers.pop() 67 except IndexError: 68 print "\nDelimiter mismatch. On line %d, encountered closing '%s' without corresponding open" % (c_lineno, c_symbol) 69 return 70 if o_symbol in pairmap.get(c_symbol, [c_symbol]): return 71 print "\nOpener '%s' on line %d was not closed before encountering '%s' on line %d" % (o_symbol, o_lineno, c_symbol, c_lineno) 72 return 73 74def checkit(source, opts, morecmds=[]): 75 """Check the LaTeX formatting in a sequence of lines. 76 77 Opts is a mapping of options to option values if any: 78 -m munge parenthesis and brackets 79 -d delimiters only checking 80 -v verbose trace of delimiter matching 81 -s lineno: linenumber to start scan (default is 1). 82 83 Morecmds is a sequence of LaTeX commands (without backslashes) that 84 are to be considered valid in the scan. 85 """ 86 87 texcmd = re.compile(r'\\[A-Za-z]+') 88 falsetexcmd = re.compile(r'\/([A-Za-z]+)') # Mismarked with forward slash 89 90 validcmds = set(cmdstr.split()) 91 for cmd in morecmds: 92 validcmds.add('\\' + cmd) 93 94 if '-m' in opts: 95 pairmap = {']':'[(', ')':'(['} # Munged openers 96 else: 97 pairmap = {']':'[', ')':'('} # Normal opener for a given closer 98 openpunct = set('([') # Set of valid openers 99 100 delimiters = re.compile(r'\\(begin|end){([_a-zA-Z]+)}|([()\[\]])') 101 braces = re.compile(r'({)|(})') 102 doubledwords = re.compile(r'(\b[A-za-z]+\b) \b\1\b') 103 spacingmarkup = re.compile(r'\\(ABC|ASCII|C|Cpp|EOF|infinity|NULL|plusminus|POSIX|UNIX)\s') 104 105 openers = [] # Stack of pending open delimiters 106 bracestack = [] # Stack of pending open braces 107 108 tablestart = re.compile(r'\\begin{(?:long)?table([iv]+)}') 109 tableline = re.compile(r'\\line([iv]+){') 110 tableend = re.compile(r'\\end{(?:long)?table([iv]+)}') 111 tablelevel = '' 112 tablestartline = 0 113 114 startline = int(opts.get('-s', '1')) 115 lineno = 0 116 117 for lineno, line in izip(count(startline), islice(source, startline-1, None)): 118 line = line.rstrip() 119 120 # Check balancing of open/close parenthesis, brackets, and begin/end blocks 121 for begend, name, punct in delimiters.findall(line): 122 if '-v' in opts: 123 print lineno, '|', begend, name, punct, 124 if begend == 'begin' and '-d' not in opts: 125 openers.append((lineno, name)) 126 elif punct in openpunct: 127 openers.append((lineno, punct)) 128 elif begend == 'end' and '-d' not in opts: 129 matchclose(lineno, name, openers, pairmap) 130 elif punct in pairmap: 131 matchclose(lineno, punct, openers, pairmap) 132 if '-v' in opts: 133 print ' --> ', openers 134 135 # Balance opening and closing braces 136 for open, close in braces.findall(line): 137 if open == '{': 138 bracestack.append(lineno) 139 if close == '}': 140 try: 141 bracestack.pop() 142 except IndexError: 143 print r'Warning, unmatched } on line %s.' % (lineno,) 144 145 # Optionally, skip LaTeX specific checks 146 if '-d' in opts: 147 continue 148 149 # Warn whenever forward slashes encountered with a LaTeX command 150 for cmd in falsetexcmd.findall(line): 151 if '822' in line or '.html' in line: 152 continue # Ignore false positives for urls and for /rfc822 153 if '\\' + cmd in validcmds: 154 print 'Warning, forward slash used on line %d with cmd: /%s' % (lineno, cmd) 155 156 # Check for markup requiring {} for correct spacing 157 for cmd in spacingmarkup.findall(line): 158 print r'Warning, \%s should be written as \%s{} on line %d' % (cmd, cmd, lineno) 159 160 # Validate commands 161 nc = line.find(r'\newcommand') 162 if nc != -1: 163 start = line.find('{', nc) 164 end = line.find('}', start) 165 validcmds.add(line[start+1:end]) 166 for cmd in texcmd.findall(line): 167 if cmd not in validcmds: 168 print r'Warning, unknown tex cmd on line %d: \%s' % (lineno, cmd) 169 170 # Check table levels (make sure lineii only inside tableii) 171 m = tablestart.search(line) 172 if m: 173 tablelevel = m.group(1) 174 tablestartline = lineno 175 m = tableline.search(line) 176 if m and m.group(1) != tablelevel: 177 print r'Warning, \line%s on line %d does not match \table%s on line %d' % (m.group(1), lineno, tablelevel, tablestartline) 178 if tableend.search(line): 179 tablelevel = '' 180 181 # Style guide warnings 182 if 'e.g.' in line or 'i.e.' in line: 183 print r'Style warning, avoid use of i.e or e.g. on line %d' % (lineno,) 184 185 for dw in doubledwords.findall(line): 186 print r'Doubled word warning. "%s" on line %d' % (dw, lineno) 187 188 lastline = lineno 189 for lineno, symbol in openers: 190 print "Unmatched open delimiter '%s' on line %d" % (symbol, lineno) 191 for lineno in bracestack: 192 print "Unmatched { on line %d" % (lineno,) 193 print 'Done checking %d lines.' % (lastline,) 194 return 0 195 196def main(args=None): 197 if args is None: 198 args = sys.argv[1:] 199 optitems, arglist = getopt.getopt(args, "k:mdhs:v") 200 opts = dict(optitems) 201 if '-h' in opts or args==[]: 202 print __doc__ 203 return 0 204 205 if len(arglist) < 1: 206 print 'Please specify a file to be checked' 207 return 1 208 209 for i, filespec in enumerate(arglist): 210 if '*' in filespec or '?' in filespec: 211 arglist[i:i+1] = glob.glob(filespec) 212 213 morecmds = [v for k,v in optitems if k=='-k'] 214 err = [] 215 216 for filename in arglist: 217 print '=' * 30 218 print "Checking", filename 219 try: 220 f = open(filename) 221 except IOError: 222 print 'Cannot open file %s.' % arglist[0] 223 return 2 224 225 try: 226 err.append(checkit(f, opts, morecmds)) 227 finally: 228 f.close() 229 230 return max(err) 231 232if __name__ == '__main__': 233 sys.exit(main()) 234