1#!/usr/bin/env python3 2""" 3Check the output of running Sphinx in nit-picky mode (missing references). 4""" 5 6from __future__ import annotations 7 8import argparse 9import itertools 10import os 11import re 12import subprocess 13import sys 14from pathlib import Path 15from typing import TextIO 16 17# Fail if NEWS nit found before this line number 18NEWS_NIT_THRESHOLD = 1400 19 20# Exclude these whether they're dirty or clean, 21# because they trigger a rebuild of dirty files. 22EXCLUDE_FILES = { 23 "Doc/whatsnew/changelog.rst", 24} 25 26# Subdirectories of Doc/ to exclude. 27EXCLUDE_SUBDIRS = { 28 ".env", 29 ".venv", 30 "env", 31 "includes", 32 "venv", 33} 34 35# Regex pattern to match the parts of a Sphinx warning 36WARNING_PATTERN = re.compile( 37 r"(?P<file>([A-Za-z]:[\\/])?[^:]+):(?P<line>\d+): WARNING: (?P<msg>.+)" 38) 39 40# Regex pattern to match the line numbers in a Git unified diff 41DIFF_PATTERN = re.compile( 42 r"^@@ -(?P<linea>\d+)(?:,(?P<removed>\d+))? \+(?P<lineb>\d+)(?:,(?P<added>\d+))? @@", 43 flags=re.MULTILINE, 44) 45 46 47def get_diff_files(ref_a: str, ref_b: str, filter_mode: str = "") -> set[Path]: 48 """List the files changed between two Git refs, filtered by change type.""" 49 added_files_result = subprocess.run( 50 [ 51 "git", 52 "diff", 53 f"--diff-filter={filter_mode}", 54 "--name-only", 55 f"{ref_a}...{ref_b}", 56 "--", 57 ], 58 stdout=subprocess.PIPE, 59 check=True, 60 text=True, 61 encoding="UTF-8", 62 ) 63 64 added_files = added_files_result.stdout.strip().split("\n") 65 return {Path(file.strip()) for file in added_files if file.strip()} 66 67 68def get_diff_lines(ref_a: str, ref_b: str, file: Path) -> list[int]: 69 """List the lines changed between two Git refs for a specific file.""" 70 diff_output = subprocess.run( 71 [ 72 "git", 73 "diff", 74 "--unified=0", 75 f"{ref_a}...{ref_b}", 76 "--", 77 str(file), 78 ], 79 stdout=subprocess.PIPE, 80 check=True, 81 text=True, 82 encoding="UTF-8", 83 ) 84 85 # Scrape line offsets + lengths from diff and convert to line numbers 86 line_matches = DIFF_PATTERN.finditer(diff_output.stdout) 87 # Removed and added line counts are 1 if not printed 88 line_match_values = [ 89 line_match.groupdict(default=1) for line_match in line_matches 90 ] 91 line_ints = [ 92 (int(match_value["lineb"]), int(match_value["added"])) 93 for match_value in line_match_values 94 ] 95 line_ranges = [ 96 range(line_b, line_b + added) for line_b, added in line_ints 97 ] 98 line_numbers = list(itertools.chain(*line_ranges)) 99 100 return line_numbers 101 102 103def get_para_line_numbers(file_obj: TextIO) -> list[list[int]]: 104 """Get the line numbers of text in a file object, grouped by paragraph.""" 105 paragraphs = [] 106 prev_line = None 107 for lineno, line in enumerate(file_obj): 108 lineno = lineno + 1 109 if prev_line is None or (line.strip() and not prev_line.strip()): 110 paragraph = [lineno - 1] 111 paragraphs.append(paragraph) 112 paragraph.append(lineno) 113 prev_line = line 114 return paragraphs 115 116 117def filter_and_parse_warnings( 118 warnings: list[str], files: set[Path] 119) -> list[re.Match[str]]: 120 """Get the warnings matching passed files and parse them with regex.""" 121 filtered_warnings = [ 122 warning 123 for warning in warnings 124 if any(str(file) in warning for file in files) 125 ] 126 warning_matches = [ 127 WARNING_PATTERN.fullmatch(warning.strip()) 128 for warning in filtered_warnings 129 ] 130 non_null_matches = [warning for warning in warning_matches if warning] 131 return non_null_matches 132 133 134def filter_warnings_by_diff( 135 warnings: list[re.Match[str]], ref_a: str, ref_b: str, file: Path 136) -> list[re.Match[str]]: 137 """Filter the passed per-file warnings to just those on changed lines.""" 138 diff_lines = get_diff_lines(ref_a, ref_b, file) 139 with file.open(encoding="UTF-8") as file_obj: 140 paragraphs = get_para_line_numbers(file_obj) 141 touched_paras = [ 142 para_lines 143 for para_lines in paragraphs 144 if set(diff_lines) & set(para_lines) 145 ] 146 touched_para_lines = set(itertools.chain(*touched_paras)) 147 warnings_infile = [ 148 warning for warning in warnings if str(file) in warning["file"] 149 ] 150 warnings_touched = [ 151 warning 152 for warning in warnings_infile 153 if int(warning["line"]) in touched_para_lines 154 ] 155 return warnings_touched 156 157 158def process_touched_warnings( 159 warnings: list[str], ref_a: str, ref_b: str 160) -> list[re.Match[str]]: 161 """Filter a list of Sphinx warnings to those affecting touched lines.""" 162 added_files, modified_files = tuple( 163 get_diff_files(ref_a, ref_b, filter_mode=mode) for mode in ("A", "M") 164 ) 165 166 warnings_added = filter_and_parse_warnings(warnings, added_files) 167 warnings_modified = filter_and_parse_warnings(warnings, modified_files) 168 169 modified_files_warned = { 170 file 171 for file in modified_files 172 if any(str(file) in warning["file"] for warning in warnings_modified) 173 } 174 175 warnings_modified_touched = [ 176 filter_warnings_by_diff(warnings_modified, ref_a, ref_b, file) 177 for file in modified_files_warned 178 ] 179 warnings_touched = warnings_added + list( 180 itertools.chain(*warnings_modified_touched) 181 ) 182 183 return warnings_touched 184 185 186def annotate_diff( 187 warnings: list[str], ref_a: str = "main", ref_b: str = "HEAD" 188) -> None: 189 """ 190 Convert Sphinx warning messages to GitHub Actions for changed paragraphs. 191 192 Converts lines like: 193 .../Doc/library/cgi.rst:98: WARNING: reference target not found 194 to: 195 ::warning file=.../Doc/library/cgi.rst,line=98::reference target not found 196 197 See: 198 https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-warning-message 199 """ 200 warnings_touched = process_touched_warnings(warnings, ref_a, ref_b) 201 print("Emitting doc warnings matching modified lines:") 202 for warning in warnings_touched: 203 print("::warning file={file},line={line}::{msg}".format_map(warning)) 204 print(warning[0]) 205 if not warnings_touched: 206 print("None") 207 208 209def fail_if_regression( 210 warnings: list[str], 211 files_with_expected_nits: set[str], 212 files_with_nits: set[str], 213) -> int: 214 """ 215 Ensure some files always pass Sphinx nit-picky mode (no missing references). 216 These are files which are *not* in .nitignore. 217 """ 218 all_rst = { 219 str(rst) 220 for rst in Path("Doc/").rglob("*.rst") 221 if rst.parts[1] not in EXCLUDE_SUBDIRS 222 } 223 should_be_clean = all_rst - files_with_expected_nits - EXCLUDE_FILES 224 problem_files = sorted(should_be_clean & files_with_nits) 225 if problem_files: 226 print("\nError: must not contain warnings:\n") 227 for filename in problem_files: 228 print(filename) 229 for warning in warnings: 230 if filename in warning: 231 if match := WARNING_PATTERN.fullmatch(warning): 232 print(" {line}: {msg}".format_map(match)) 233 return -1 234 return 0 235 236 237def fail_if_improved( 238 files_with_expected_nits: set[str], files_with_nits: set[str] 239) -> int: 240 """ 241 We may have fixed warnings in some files so that the files are now completely clean. 242 Good news! Let's add them to .nitignore to prevent regression. 243 """ 244 files_with_no_nits = files_with_expected_nits - files_with_nits 245 if files_with_no_nits: 246 print("\nCongratulations! You improved:\n") 247 for filename in sorted(files_with_no_nits): 248 print(filename) 249 print("\nPlease remove from Doc/tools/.nitignore\n") 250 return -1 251 return 0 252 253 254def fail_if_new_news_nit(warnings: list[str], threshold: int) -> int: 255 """ 256 Ensure no warnings are found in the NEWS file before a given line number. 257 """ 258 news_nits = (warning for warning in warnings if "/build/NEWS:" in warning) 259 260 # Nits found before the threshold line 261 new_news_nits = [ 262 nit for nit in news_nits if int(nit.split(":")[1]) <= threshold 263 ] 264 265 if new_news_nits: 266 print("\nError: new NEWS nits:\n") 267 for warning in new_news_nits: 268 print(warning) 269 return -1 270 271 return 0 272 273 274def main(argv: list[str] | None = None) -> int: 275 parser = argparse.ArgumentParser() 276 parser.add_argument( 277 "--annotate-diff", 278 nargs="*", 279 metavar=("BASE_REF", "HEAD_REF"), 280 help="Add GitHub Actions annotations on the diff for warnings on " 281 "lines changed between the given refs (main and HEAD, by default)", 282 ) 283 parser.add_argument( 284 "--fail-if-regression", 285 action="store_true", 286 help="Fail if known-good files have warnings", 287 ) 288 parser.add_argument( 289 "--fail-if-improved", 290 action="store_true", 291 help="Fail if new files with no nits are found", 292 ) 293 parser.add_argument( 294 "--fail-if-new-news-nit", 295 metavar="threshold", 296 type=int, 297 nargs="?", 298 const=NEWS_NIT_THRESHOLD, 299 help="Fail if new NEWS nit found before threshold line number", 300 ) 301 302 args = parser.parse_args(argv) 303 if args.annotate_diff is not None and len(args.annotate_diff) > 2: 304 parser.error( 305 "--annotate-diff takes between 0 and 2 ref args, not " 306 f"{len(args.annotate_diff)} {tuple(args.annotate_diff)}" 307 ) 308 exit_code = 0 309 310 wrong_directory_msg = "Must run this script from the repo root" 311 if not Path("Doc").exists() or not Path("Doc").is_dir(): 312 raise RuntimeError(wrong_directory_msg) 313 314 with Path("Doc/sphinx-warnings.txt").open(encoding="UTF-8") as f: 315 warnings = f.read().splitlines() 316 317 cwd = str(Path.cwd()) + os.path.sep 318 files_with_nits = { 319 warning.removeprefix(cwd).split(":")[0] 320 for warning in warnings 321 if "Doc/" in warning 322 } 323 324 with Path("Doc/tools/.nitignore").open(encoding="UTF-8") as clean_files: 325 files_with_expected_nits = { 326 filename.strip() 327 for filename in clean_files 328 if filename.strip() and not filename.startswith("#") 329 } 330 331 if args.annotate_diff is not None: 332 annotate_diff(warnings, *args.annotate_diff) 333 334 if args.fail_if_regression: 335 exit_code += fail_if_regression( 336 warnings, files_with_expected_nits, files_with_nits 337 ) 338 339 if args.fail_if_improved: 340 exit_code += fail_if_improved( 341 files_with_expected_nits, files_with_nits 342 ) 343 344 if args.fail_if_new_news_nit: 345 exit_code += fail_if_new_news_nit(warnings, args.fail_if_new_news_nit) 346 347 return exit_code 348 349 350if __name__ == "__main__": 351 sys.exit(main()) 352