• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3
4# Check for stylistic and formal issues in .rst and .py
5# files included in the documentation.
6#
7# 01/2009, Georg Brandl
8
9# TODO: - wrong versions in versionadded/changed
10#       - wrong markup after versionchanged directive
11
12import os
13import re
14import sys
15import getopt
16from string import ascii_letters
17from os.path import join, splitext, abspath, exists
18from collections import defaultdict
19
20directives = [
21    # standard docutils ones
22    'admonition', 'attention', 'caution', 'class', 'compound', 'container',
23    'contents', 'csv-table', 'danger', 'date', 'default-role', 'epigraph',
24    'error', 'figure', 'footer', 'header', 'highlights', 'hint', 'image',
25    'important', 'include', 'line-block', 'list-table', 'meta', 'note',
26    'parsed-literal', 'pull-quote', 'raw', 'replace',
27    'restructuredtext-test-directive', 'role', 'rubric', 'sectnum', 'sidebar',
28    'table', 'target-notes', 'tip', 'title', 'topic', 'unicode', 'warning',
29    # Sphinx and Python docs custom ones
30    'acks', 'attribute', 'autoattribute', 'autoclass', 'autodata',
31    'autoexception', 'autofunction', 'automethod', 'automodule',
32    'availability', 'centered', 'cfunction', 'class', 'classmethod', 'cmacro',
33    'cmdoption', 'cmember', 'code-block', 'confval', 'cssclass', 'ctype',
34    'currentmodule', 'cvar', 'data', 'decorator', 'decoratormethod',
35    'deprecated-removed', 'deprecated(?!-removed)', 'describe', 'directive',
36    'doctest', 'envvar', 'event', 'exception', 'function', 'glossary',
37    'highlight', 'highlightlang', 'impl-detail', 'index', 'literalinclude',
38    'method', 'miscnews', 'module', 'moduleauthor', 'opcode', 'pdbcommand',
39    'productionlist', 'program', 'role', 'sectionauthor', 'seealso',
40    'sourcecode', 'staticmethod', 'tabularcolumns', 'testcode', 'testoutput',
41    'testsetup', 'toctree', 'todo', 'todolist', 'versionadded',
42    'versionchanged'
43]
44
45all_directives = '(' + '|'.join(directives) + ')'
46seems_directive_re = re.compile(r'(?<!\.)\.\. %s([^a-z:]|:(?!:))' % all_directives)
47default_role_re = re.compile(r'(^| )`\w([^`]*?\w)?`($| )')
48leaked_markup_re = re.compile(r'[a-z]::\s|`|\.\.\s*\w+:')
49
50
51checkers = {}
52
53checker_props = {'severity': 1, 'falsepositives': False}
54
55
56def checker(*suffixes, **kwds):
57    """Decorator to register a function as a checker."""
58    def deco(func):
59        for suffix in suffixes:
60            checkers.setdefault(suffix, []).append(func)
61        for prop in checker_props:
62            setattr(func, prop, kwds.get(prop, checker_props[prop]))
63        return func
64    return deco
65
66
67@checker('.py', severity=4)
68def check_syntax(fn, lines):
69    """Check Python examples for valid syntax."""
70    code = ''.join(lines)
71    if '\r' in code:
72        if os.name != 'nt':
73            yield 0, '\\r in code file'
74        code = code.replace('\r', '')
75    try:
76        compile(code, fn, 'exec')
77    except SyntaxError as err:
78        yield err.lineno, 'not compilable: %s' % err
79
80
81@checker('.rst', severity=2)
82def check_suspicious_constructs(fn, lines):
83    """Check for suspicious reST constructs."""
84    inprod = False
85    for lno, line in enumerate(lines):
86        if seems_directive_re.search(line):
87            yield lno+1, 'comment seems to be intended as a directive'
88        if '.. productionlist::' in line:
89            inprod = True
90        elif not inprod and default_role_re.search(line):
91            yield lno+1, 'default role used'
92        elif inprod and not line.strip():
93            inprod = False
94
95
96@checker('.py', '.rst')
97def check_whitespace(fn, lines):
98    """Check for whitespace and line length issues."""
99    for lno, line in enumerate(lines):
100        if '\r' in line:
101            yield lno+1, '\\r in line'
102        if '\t' in line:
103            yield lno+1, 'OMG TABS!!!1'
104        if line[:-1].rstrip(' \t') != line[:-1]:
105            yield lno+1, 'trailing whitespace'
106
107
108@checker('.rst', severity=0)
109def check_line_length(fn, lines):
110    """Check for line length; this checker is not run by default."""
111    for lno, line in enumerate(lines):
112        if len(line) > 81:
113            # don't complain about tables, links and function signatures
114            if line.lstrip()[0] not in '+|' and \
115               'http://' not in line and \
116               not line.lstrip().startswith(('.. function',
117                                             '.. method',
118                                             '.. cfunction')):
119                yield lno+1, "line too long"
120
121
122@checker('.html', severity=2, falsepositives=True)
123def check_leaked_markup(fn, lines):
124    """Check HTML files for leaked reST markup; this only works if
125    the HTML files have been built.
126    """
127    for lno, line in enumerate(lines):
128        if leaked_markup_re.search(line):
129            yield lno+1, 'possibly leaked markup: %r' % line
130
131
132def hide_literal_blocks(lines):
133    """Tool to remove literal blocks from given lines.
134
135    It yields empty lines in place of blocks, so line numbers are
136    still meaningful.
137    """
138    in_block = False
139    for line in lines:
140        if line.endswith("::\n"):
141            in_block = True
142        elif in_block:
143            if line == "\n" or line.startswith(" "):
144                line = "\n"
145            else:
146                in_block = False
147        yield line
148
149
150def type_of_explicit_markup(line):
151    if re.match(fr'\.\. {all_directives}::', line):
152        return 'directive'
153    if re.match(r'\.\. \[[0-9]+\] ', line):
154        return 'footnote'
155    if re.match(r'\.\. \[[^\]]+\] ', line):
156        return 'citation'
157    if re.match(r'\.\. _.*[^_]: ', line):
158        return 'target'
159    if re.match(r'\.\. \|[^\|]*\| ', line):
160        return 'substitution_definition'
161    return 'comment'
162
163
164def hide_comments(lines):
165    """Tool to remove comments from given lines.
166
167    It yields empty lines in place of comments, so line numbers are
168    still meaningful.
169    """
170    in_multiline_comment = False
171    for line in lines:
172        if line == "..\n":
173            in_multiline_comment = True
174        elif in_multiline_comment:
175            if line == "\n" or line.startswith(" "):
176                line = "\n"
177            else:
178                in_multiline_comment = False
179        if line.startswith(".. ") and type_of_explicit_markup(line) == 'comment':
180            line = "\n"
181        yield line
182
183
184
185@checker(".rst", severity=2)
186def check_missing_surrogate_space_on_plural(fn, lines):
187    r"""Check for missing 'backslash-space' between a code sample a letter.
188
189    Good: ``Point``\ s
190    Bad: ``Point``s
191    """
192    in_code_sample = False
193    check_next_one = False
194    for lno, line in enumerate(hide_comments(hide_literal_blocks(lines))):
195        tokens = line.split("``")
196        for token_no, token in enumerate(tokens):
197            if check_next_one:
198                if token[0] in ascii_letters:
199                    yield lno + 1, f"Missing backslash-space between code sample and {token!r}."
200                check_next_one = False
201            if token_no == len(tokens) - 1:
202                continue
203            if in_code_sample:
204                check_next_one = True
205            in_code_sample = not in_code_sample
206
207def main(argv):
208    usage = '''\
209Usage: %s [-v] [-f] [-s sev] [-i path]* [path]
210
211Options:  -v       verbose (print all checked file names)
212          -f       enable checkers that yield many false positives
213          -s sev   only show problems with severity >= sev
214          -i path  ignore subdir or file path
215''' % argv[0]
216    try:
217        gopts, args = getopt.getopt(argv[1:], 'vfs:i:')
218    except getopt.GetoptError:
219        print(usage)
220        return 2
221
222    verbose = False
223    severity = 1
224    ignore = []
225    falsepos = False
226    for opt, val in gopts:
227        if opt == '-v':
228            verbose = True
229        elif opt == '-f':
230            falsepos = True
231        elif opt == '-s':
232            severity = int(val)
233        elif opt == '-i':
234            ignore.append(abspath(val))
235
236    if len(args) == 0:
237        path = '.'
238    elif len(args) == 1:
239        path = args[0]
240    else:
241        print(usage)
242        return 2
243
244    if not exists(path):
245        print('Error: path %s does not exist' % path)
246        return 2
247
248    count = defaultdict(int)
249
250    for root, dirs, files in os.walk(path):
251        # ignore subdirs in ignore list
252        if abspath(root) in ignore:
253            del dirs[:]
254            continue
255
256        for fn in files:
257            fn = join(root, fn)
258            if fn[:2] == './':
259                fn = fn[2:]
260
261            # ignore files in ignore list
262            if abspath(fn) in ignore:
263                continue
264
265            ext = splitext(fn)[1]
266            checkerlist = checkers.get(ext, None)
267            if not checkerlist:
268                continue
269
270            if verbose:
271                print('Checking %s...' % fn)
272
273            try:
274                with open(fn, 'r', encoding='utf-8') as f:
275                    lines = list(f)
276            except (IOError, OSError) as err:
277                print('%s: cannot open: %s' % (fn, err))
278                count[4] += 1
279                continue
280
281            for checker in checkerlist:
282                if checker.falsepositives and not falsepos:
283                    continue
284                csev = checker.severity
285                if csev >= severity:
286                    for lno, msg in checker(fn, lines):
287                        print('[%d] %s:%d: %s' % (csev, fn, lno, msg))
288                        count[csev] += 1
289    if verbose:
290        print()
291    if not count:
292        if severity > 1:
293            print('No problems with severity >= %d found.' % severity)
294        else:
295            print('No problems found.')
296    else:
297        for severity in sorted(count):
298            number = count[severity]
299            print('%d problem%s with severity %d found.' %
300                  (number, number > 1 and 's' or '', severity))
301    return int(bool(count))
302
303
304if __name__ == '__main__':
305    sys.exit(main(sys.argv))
306