1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3 4# Check for stylistic and formal issues in .rst and .py 5# files included in the documentation. 6# 7# 01/2009, Georg Brandl 8 9# TODO: - wrong versions in versionadded/changed 10# - wrong markup after versionchanged directive 11 12import os 13import re 14import sys 15import getopt 16from string import ascii_letters 17from os.path import join, splitext, abspath, exists 18from collections import defaultdict 19 20directives = [ 21 # standard docutils ones 22 'admonition', 'attention', 'caution', 'class', 'compound', 'container', 23 'contents', 'csv-table', 'danger', 'date', 'default-role', 'epigraph', 24 'error', 'figure', 'footer', 'header', 'highlights', 'hint', 'image', 25 'important', 'include', 'line-block', 'list-table', 'meta', 'note', 26 'parsed-literal', 'pull-quote', 'raw', 'replace', 27 'restructuredtext-test-directive', 'role', 'rubric', 'sectnum', 'sidebar', 28 'table', 'target-notes', 'tip', 'title', 'topic', 'unicode', 'warning', 29 # Sphinx and Python docs custom ones 30 'acks', 'attribute', 'autoattribute', 'autoclass', 'autodata', 31 'autoexception', 'autofunction', 'automethod', 'automodule', 32 'availability', 'centered', 'cfunction', 'class', 'classmethod', 'cmacro', 33 'cmdoption', 'cmember', 'code-block', 'confval', 'cssclass', 'ctype', 34 'currentmodule', 'cvar', 'data', 'decorator', 'decoratormethod', 35 'deprecated-removed', 'deprecated(?!-removed)', 'describe', 'directive', 36 'doctest', 'envvar', 'event', 'exception', 'function', 'glossary', 37 'highlight', 'highlightlang', 'impl-detail', 'index', 'literalinclude', 38 'method', 'miscnews', 'module', 'moduleauthor', 'opcode', 'pdbcommand', 39 'productionlist', 'program', 'role', 'sectionauthor', 'seealso', 40 'sourcecode', 'staticmethod', 'tabularcolumns', 'testcode', 'testoutput', 41 'testsetup', 'toctree', 'todo', 'todolist', 'versionadded', 42 'versionchanged' 43] 44 45all_directives = '(' + '|'.join(directives) + ')' 46seems_directive_re = re.compile(r'(?<!\.)\.\. %s([^a-z:]|:(?!:))' % all_directives) 47default_role_re = re.compile(r'(^| )`\w([^`]*?\w)?`($| )') 48leaked_markup_re = re.compile(r'[a-z]::\s|`|\.\.\s*\w+:') 49 50 51checkers = {} 52 53checker_props = {'severity': 1, 'falsepositives': False} 54 55 56def checker(*suffixes, **kwds): 57 """Decorator to register a function as a checker.""" 58 def deco(func): 59 for suffix in suffixes: 60 checkers.setdefault(suffix, []).append(func) 61 for prop in checker_props: 62 setattr(func, prop, kwds.get(prop, checker_props[prop])) 63 return func 64 return deco 65 66 67@checker('.py', severity=4) 68def check_syntax(fn, lines): 69 """Check Python examples for valid syntax.""" 70 code = ''.join(lines) 71 if '\r' in code: 72 if os.name != 'nt': 73 yield 0, '\\r in code file' 74 code = code.replace('\r', '') 75 try: 76 compile(code, fn, 'exec') 77 except SyntaxError as err: 78 yield err.lineno, 'not compilable: %s' % err 79 80 81@checker('.rst', severity=2) 82def check_suspicious_constructs(fn, lines): 83 """Check for suspicious reST constructs.""" 84 inprod = False 85 for lno, line in enumerate(lines): 86 if seems_directive_re.search(line): 87 yield lno+1, 'comment seems to be intended as a directive' 88 if '.. productionlist::' in line: 89 inprod = True 90 elif not inprod and default_role_re.search(line): 91 yield lno+1, 'default role used' 92 elif inprod and not line.strip(): 93 inprod = False 94 95 96@checker('.py', '.rst') 97def check_whitespace(fn, lines): 98 """Check for whitespace and line length issues.""" 99 for lno, line in enumerate(lines): 100 if '\r' in line: 101 yield lno+1, '\\r in line' 102 if '\t' in line: 103 yield lno+1, 'OMG TABS!!!1' 104 if line[:-1].rstrip(' \t') != line[:-1]: 105 yield lno+1, 'trailing whitespace' 106 107 108@checker('.rst', severity=0) 109def check_line_length(fn, lines): 110 """Check for line length; this checker is not run by default.""" 111 for lno, line in enumerate(lines): 112 if len(line) > 81: 113 # don't complain about tables, links and function signatures 114 if line.lstrip()[0] not in '+|' and \ 115 'http://' not in line and \ 116 not line.lstrip().startswith(('.. function', 117 '.. method', 118 '.. cfunction')): 119 yield lno+1, "line too long" 120 121 122@checker('.html', severity=2, falsepositives=True) 123def check_leaked_markup(fn, lines): 124 """Check HTML files for leaked reST markup; this only works if 125 the HTML files have been built. 126 """ 127 for lno, line in enumerate(lines): 128 if leaked_markup_re.search(line): 129 yield lno+1, 'possibly leaked markup: %r' % line 130 131 132def hide_literal_blocks(lines): 133 """Tool to remove literal blocks from given lines. 134 135 It yields empty lines in place of blocks, so line numbers are 136 still meaningful. 137 """ 138 in_block = False 139 for line in lines: 140 if line.endswith("::\n"): 141 in_block = True 142 elif in_block: 143 if line == "\n" or line.startswith(" "): 144 line = "\n" 145 else: 146 in_block = False 147 yield line 148 149 150def type_of_explicit_markup(line): 151 if re.match(fr'\.\. {all_directives}::', line): 152 return 'directive' 153 if re.match(r'\.\. \[[0-9]+\] ', line): 154 return 'footnote' 155 if re.match(r'\.\. \[[^\]]+\] ', line): 156 return 'citation' 157 if re.match(r'\.\. _.*[^_]: ', line): 158 return 'target' 159 if re.match(r'\.\. \|[^\|]*\| ', line): 160 return 'substitution_definition' 161 return 'comment' 162 163 164def hide_comments(lines): 165 """Tool to remove comments from given lines. 166 167 It yields empty lines in place of comments, so line numbers are 168 still meaningful. 169 """ 170 in_multiline_comment = False 171 for line in lines: 172 if line == "..\n": 173 in_multiline_comment = True 174 elif in_multiline_comment: 175 if line == "\n" or line.startswith(" "): 176 line = "\n" 177 else: 178 in_multiline_comment = False 179 if line.startswith(".. ") and type_of_explicit_markup(line) == 'comment': 180 line = "\n" 181 yield line 182 183 184 185@checker(".rst", severity=2) 186def check_missing_surrogate_space_on_plural(fn, lines): 187 r"""Check for missing 'backslash-space' between a code sample a letter. 188 189 Good: ``Point``\ s 190 Bad: ``Point``s 191 """ 192 in_code_sample = False 193 check_next_one = False 194 for lno, line in enumerate(hide_comments(hide_literal_blocks(lines))): 195 tokens = line.split("``") 196 for token_no, token in enumerate(tokens): 197 if check_next_one: 198 if token[0] in ascii_letters: 199 yield lno + 1, f"Missing backslash-space between code sample and {token!r}." 200 check_next_one = False 201 if token_no == len(tokens) - 1: 202 continue 203 if in_code_sample: 204 check_next_one = True 205 in_code_sample = not in_code_sample 206 207def main(argv): 208 usage = '''\ 209Usage: %s [-v] [-f] [-s sev] [-i path]* [path] 210 211Options: -v verbose (print all checked file names) 212 -f enable checkers that yield many false positives 213 -s sev only show problems with severity >= sev 214 -i path ignore subdir or file path 215''' % argv[0] 216 try: 217 gopts, args = getopt.getopt(argv[1:], 'vfs:i:') 218 except getopt.GetoptError: 219 print(usage) 220 return 2 221 222 verbose = False 223 severity = 1 224 ignore = [] 225 falsepos = False 226 for opt, val in gopts: 227 if opt == '-v': 228 verbose = True 229 elif opt == '-f': 230 falsepos = True 231 elif opt == '-s': 232 severity = int(val) 233 elif opt == '-i': 234 ignore.append(abspath(val)) 235 236 if len(args) == 0: 237 path = '.' 238 elif len(args) == 1: 239 path = args[0] 240 else: 241 print(usage) 242 return 2 243 244 if not exists(path): 245 print('Error: path %s does not exist' % path) 246 return 2 247 248 count = defaultdict(int) 249 250 for root, dirs, files in os.walk(path): 251 # ignore subdirs in ignore list 252 if abspath(root) in ignore: 253 del dirs[:] 254 continue 255 256 for fn in files: 257 fn = join(root, fn) 258 if fn[:2] == './': 259 fn = fn[2:] 260 261 # ignore files in ignore list 262 if abspath(fn) in ignore: 263 continue 264 265 ext = splitext(fn)[1] 266 checkerlist = checkers.get(ext, None) 267 if not checkerlist: 268 continue 269 270 if verbose: 271 print('Checking %s...' % fn) 272 273 try: 274 with open(fn, 'r', encoding='utf-8') as f: 275 lines = list(f) 276 except (IOError, OSError) as err: 277 print('%s: cannot open: %s' % (fn, err)) 278 count[4] += 1 279 continue 280 281 for checker in checkerlist: 282 if checker.falsepositives and not falsepos: 283 continue 284 csev = checker.severity 285 if csev >= severity: 286 for lno, msg in checker(fn, lines): 287 print('[%d] %s:%d: %s' % (csev, fn, lno, msg)) 288 count[csev] += 1 289 if verbose: 290 print() 291 if not count: 292 if severity > 1: 293 print('No problems with severity >= %d found.' % severity) 294 else: 295 print('No problems found.') 296 else: 297 for severity in sorted(count): 298 number = count[severity] 299 print('%d problem%s with severity %d found.' % 300 (number, number > 1 and 's' or '', severity)) 301 return int(bool(count)) 302 303 304if __name__ == '__main__': 305 sys.exit(main(sys.argv)) 306