• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2"""
3Check the output of running Sphinx in nit-picky mode (missing references).
4"""
5
6from __future__ import annotations
7
8import argparse
9import itertools
10import os
11import re
12import subprocess
13import sys
14from pathlib import Path
15from typing import TextIO
16
17# Fail if NEWS nit found before this line number
18NEWS_NIT_THRESHOLD = 1400
19
20# Exclude these whether they're dirty or clean,
21# because they trigger a rebuild of dirty files.
22EXCLUDE_FILES = {
23    "Doc/whatsnew/changelog.rst",
24}
25
26# Subdirectories of Doc/ to exclude.
27EXCLUDE_SUBDIRS = {
28    ".env",
29    ".venv",
30    "env",
31    "includes",
32    "venv",
33}
34
35# Regex pattern to match the parts of a Sphinx warning
36WARNING_PATTERN = re.compile(
37    r"(?P<file>([A-Za-z]:[\\/])?[^:]+):(?P<line>\d+): WARNING: (?P<msg>.+)"
38)
39
40# Regex pattern to match the line numbers in a Git unified diff
41DIFF_PATTERN = re.compile(
42    r"^@@ -(?P<linea>\d+)(?:,(?P<removed>\d+))? \+(?P<lineb>\d+)(?:,(?P<added>\d+))? @@",
43    flags=re.MULTILINE,
44)
45
46
47def get_diff_files(ref_a: str, ref_b: str, filter_mode: str = "") -> set[Path]:
48    """List the files changed between two Git refs, filtered by change type."""
49    added_files_result = subprocess.run(
50        [
51            "git",
52            "diff",
53            f"--diff-filter={filter_mode}",
54            "--name-only",
55            f"{ref_a}...{ref_b}",
56            "--",
57        ],
58        stdout=subprocess.PIPE,
59        check=True,
60        text=True,
61        encoding="UTF-8",
62    )
63
64    added_files = added_files_result.stdout.strip().split("\n")
65    return {Path(file.strip()) for file in added_files if file.strip()}
66
67
68def get_diff_lines(ref_a: str, ref_b: str, file: Path) -> list[int]:
69    """List the lines changed between two Git refs for a specific file."""
70    diff_output = subprocess.run(
71        [
72            "git",
73            "diff",
74            "--unified=0",
75            f"{ref_a}...{ref_b}",
76            "--",
77            str(file),
78        ],
79        stdout=subprocess.PIPE,
80        check=True,
81        text=True,
82        encoding="UTF-8",
83    )
84
85    # Scrape line offsets + lengths from diff and convert to line numbers
86    line_matches = DIFF_PATTERN.finditer(diff_output.stdout)
87    # Removed and added line counts are 1 if not printed
88    line_match_values = [
89        line_match.groupdict(default=1) for line_match in line_matches
90    ]
91    line_ints = [
92        (int(match_value["lineb"]), int(match_value["added"]))
93        for match_value in line_match_values
94    ]
95    line_ranges = [
96        range(line_b, line_b + added) for line_b, added in line_ints
97    ]
98    line_numbers = list(itertools.chain(*line_ranges))
99
100    return line_numbers
101
102
103def get_para_line_numbers(file_obj: TextIO) -> list[list[int]]:
104    """Get the line numbers of text in a file object, grouped by paragraph."""
105    paragraphs = []
106    prev_line = None
107    for lineno, line in enumerate(file_obj):
108        lineno = lineno + 1
109        if prev_line is None or (line.strip() and not prev_line.strip()):
110            paragraph = [lineno - 1]
111            paragraphs.append(paragraph)
112        paragraph.append(lineno)
113        prev_line = line
114    return paragraphs
115
116
117def filter_and_parse_warnings(
118    warnings: list[str], files: set[Path]
119) -> list[re.Match[str]]:
120    """Get the warnings matching passed files and parse them with regex."""
121    filtered_warnings = [
122        warning
123        for warning in warnings
124        if any(str(file) in warning for file in files)
125    ]
126    warning_matches = [
127        WARNING_PATTERN.fullmatch(warning.strip())
128        for warning in filtered_warnings
129    ]
130    non_null_matches = [warning for warning in warning_matches if warning]
131    return non_null_matches
132
133
134def filter_warnings_by_diff(
135    warnings: list[re.Match[str]], ref_a: str, ref_b: str, file: Path
136) -> list[re.Match[str]]:
137    """Filter the passed per-file warnings to just those on changed lines."""
138    diff_lines = get_diff_lines(ref_a, ref_b, file)
139    with file.open(encoding="UTF-8") as file_obj:
140        paragraphs = get_para_line_numbers(file_obj)
141    touched_paras = [
142        para_lines
143        for para_lines in paragraphs
144        if set(diff_lines) & set(para_lines)
145    ]
146    touched_para_lines = set(itertools.chain(*touched_paras))
147    warnings_infile = [
148        warning for warning in warnings if str(file) in warning["file"]
149    ]
150    warnings_touched = [
151        warning
152        for warning in warnings_infile
153        if int(warning["line"]) in touched_para_lines
154    ]
155    return warnings_touched
156
157
158def process_touched_warnings(
159    warnings: list[str], ref_a: str, ref_b: str
160) -> list[re.Match[str]]:
161    """Filter a list of Sphinx warnings to those affecting touched lines."""
162    added_files, modified_files = tuple(
163        get_diff_files(ref_a, ref_b, filter_mode=mode) for mode in ("A", "M")
164    )
165
166    warnings_added = filter_and_parse_warnings(warnings, added_files)
167    warnings_modified = filter_and_parse_warnings(warnings, modified_files)
168
169    modified_files_warned = {
170        file
171        for file in modified_files
172        if any(str(file) in warning["file"] for warning in warnings_modified)
173    }
174
175    warnings_modified_touched = [
176        filter_warnings_by_diff(warnings_modified, ref_a, ref_b, file)
177        for file in modified_files_warned
178    ]
179    warnings_touched = warnings_added + list(
180        itertools.chain(*warnings_modified_touched)
181    )
182
183    return warnings_touched
184
185
186def annotate_diff(
187    warnings: list[str], ref_a: str = "main", ref_b: str = "HEAD"
188) -> None:
189    """
190    Convert Sphinx warning messages to GitHub Actions for changed paragraphs.
191
192    Converts lines like:
193        .../Doc/library/cgi.rst:98: WARNING: reference target not found
194    to:
195        ::warning file=.../Doc/library/cgi.rst,line=98::reference target not found
196
197    See:
198    https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-warning-message
199    """
200    warnings_touched = process_touched_warnings(warnings, ref_a, ref_b)
201    print("Emitting doc warnings matching modified lines:")
202    for warning in warnings_touched:
203        print("::warning file={file},line={line}::{msg}".format_map(warning))
204        print(warning[0])
205    if not warnings_touched:
206        print("None")
207
208
209def fail_if_regression(
210    warnings: list[str],
211    files_with_expected_nits: set[str],
212    files_with_nits: set[str],
213) -> int:
214    """
215    Ensure some files always pass Sphinx nit-picky mode (no missing references).
216    These are files which are *not* in .nitignore.
217    """
218    all_rst = {
219        str(rst)
220        for rst in Path("Doc/").rglob("*.rst")
221        if rst.parts[1] not in EXCLUDE_SUBDIRS
222    }
223    should_be_clean = all_rst - files_with_expected_nits - EXCLUDE_FILES
224    problem_files = sorted(should_be_clean & files_with_nits)
225    if problem_files:
226        print("\nError: must not contain warnings:\n")
227        for filename in problem_files:
228            print(filename)
229            for warning in warnings:
230                if filename in warning:
231                    if match := WARNING_PATTERN.fullmatch(warning):
232                        print("  {line}: {msg}".format_map(match))
233        return -1
234    return 0
235
236
237def fail_if_improved(
238    files_with_expected_nits: set[str], files_with_nits: set[str]
239) -> int:
240    """
241    We may have fixed warnings in some files so that the files are now completely clean.
242    Good news! Let's add them to .nitignore to prevent regression.
243    """
244    files_with_no_nits = files_with_expected_nits - files_with_nits
245    if files_with_no_nits:
246        print("\nCongratulations! You improved:\n")
247        for filename in sorted(files_with_no_nits):
248            print(filename)
249        print("\nPlease remove from Doc/tools/.nitignore\n")
250        return -1
251    return 0
252
253
254def fail_if_new_news_nit(warnings: list[str], threshold: int) -> int:
255    """
256    Ensure no warnings are found in the NEWS file before a given line number.
257    """
258    news_nits = (warning for warning in warnings if "/build/NEWS:" in warning)
259
260    # Nits found before the threshold line
261    new_news_nits = [
262        nit for nit in news_nits if int(nit.split(":")[1]) <= threshold
263    ]
264
265    if new_news_nits:
266        print("\nError: new NEWS nits:\n")
267        for warning in new_news_nits:
268            print(warning)
269        return -1
270
271    return 0
272
273
274def main(argv: list[str] | None = None) -> int:
275    parser = argparse.ArgumentParser()
276    parser.add_argument(
277        "--annotate-diff",
278        nargs="*",
279        metavar=("BASE_REF", "HEAD_REF"),
280        help="Add GitHub Actions annotations on the diff for warnings on "
281        "lines changed between the given refs (main and HEAD, by default)",
282    )
283    parser.add_argument(
284        "--fail-if-regression",
285        action="store_true",
286        help="Fail if known-good files have warnings",
287    )
288    parser.add_argument(
289        "--fail-if-improved",
290        action="store_true",
291        help="Fail if new files with no nits are found",
292    )
293    parser.add_argument(
294        "--fail-if-new-news-nit",
295        metavar="threshold",
296        type=int,
297        nargs="?",
298        const=NEWS_NIT_THRESHOLD,
299        help="Fail if new NEWS nit found before threshold line number",
300    )
301
302    args = parser.parse_args(argv)
303    if args.annotate_diff is not None and len(args.annotate_diff) > 2:
304        parser.error(
305            "--annotate-diff takes between 0 and 2 ref args, not "
306            f"{len(args.annotate_diff)} {tuple(args.annotate_diff)}"
307        )
308    exit_code = 0
309
310    wrong_directory_msg = "Must run this script from the repo root"
311    if not Path("Doc").exists() or not Path("Doc").is_dir():
312        raise RuntimeError(wrong_directory_msg)
313
314    with Path("Doc/sphinx-warnings.txt").open(encoding="UTF-8") as f:
315        warnings = f.read().splitlines()
316
317    cwd = str(Path.cwd()) + os.path.sep
318    files_with_nits = {
319        warning.removeprefix(cwd).split(":")[0]
320        for warning in warnings
321        if "Doc/" in warning
322    }
323
324    with Path("Doc/tools/.nitignore").open(encoding="UTF-8") as clean_files:
325        files_with_expected_nits = {
326            filename.strip()
327            for filename in clean_files
328            if filename.strip() and not filename.startswith("#")
329        }
330
331    if args.annotate_diff is not None:
332        annotate_diff(warnings, *args.annotate_diff)
333
334    if args.fail_if_regression:
335        exit_code += fail_if_regression(
336            warnings, files_with_expected_nits, files_with_nits
337        )
338
339    if args.fail_if_improved:
340        exit_code += fail_if_improved(
341            files_with_expected_nits, files_with_nits
342        )
343
344    if args.fail_if_new_news_nit:
345        exit_code += fail_if_new_news_nit(warnings, args.fail_if_new_news_nit)
346
347    return exit_code
348
349
350if __name__ == "__main__":
351    sys.exit(main())
352