1"""Check the stable ABI manifest or generate files from it 2 3By default, the tool only checks existing files/libraries. 4Pass --generate to recreate auto-generated files instead. 5 6For actions that take a FILENAME, the filename can be left out to use a default 7(relative to the manifest file, as they appear in the CPython codebase). 8""" 9 10from functools import partial 11from pathlib import Path 12import dataclasses 13import subprocess 14import sysconfig 15import argparse 16import textwrap 17import difflib 18import shutil 19import sys 20import os 21import os.path 22import io 23import re 24import csv 25 26MISSING = object() 27 28EXCLUDED_HEADERS = { 29 "bytes_methods.h", 30 "cellobject.h", 31 "classobject.h", 32 "code.h", 33 "compile.h", 34 "datetime.h", 35 "dtoa.h", 36 "frameobject.h", 37 "funcobject.h", 38 "genobject.h", 39 "longintrepr.h", 40 "parsetok.h", 41 "pyatomic.h", 42 "pytime.h", 43 "token.h", 44 "ucnhash.h", 45} 46MACOS = (sys.platform == "darwin") 47UNIXY = MACOS or (sys.platform == "linux") # XXX should this be "not Windows"? 48 49IFDEF_DOC_NOTES = { 50 'MS_WINDOWS': 'on Windows', 51 'HAVE_FORK': 'on platforms with fork()', 52 'USE_STACKCHECK': 'on platforms with USE_STACKCHECK', 53} 54 55# The stable ABI manifest (Misc/stable_abi.txt) exists only to fill the 56# following dataclasses. 57# Feel free to change its syntax (and the `parse_manifest` function) 58# to better serve that purpose (while keeping it human-readable). 59 60@dataclasses.dataclass 61class Manifest: 62 """Collection of `ABIItem`s forming the stable ABI/limited API.""" 63 64 kind = 'manifest' 65 contents: dict = dataclasses.field(default_factory=dict) 66 67 def add(self, item): 68 if item.name in self.contents: 69 # We assume that stable ABI items do not share names, 70 # even if they're different kinds (e.g. function vs. macro). 71 raise ValueError(f'duplicate ABI item {item.name}') 72 self.contents[item.name] = item 73 74 @property 75 def feature_defines(self): 76 """Return all feature defines which affect what's available 77 78 These are e.g. HAVE_FORK and MS_WINDOWS. 79 """ 80 return set(item.ifdef for item in self.contents.values()) - {None} 81 82 def select(self, kinds, *, include_abi_only=True, ifdef=None): 83 """Yield selected items of the manifest 84 85 kinds: set of requested kinds, e.g. {'function', 'macro'} 86 include_abi_only: if True (default), include all items of the 87 stable ABI. 88 If False, include only items from the limited API 89 (i.e. items people should use today) 90 ifdef: set of feature defines (e.g. {'HAVE_FORK', 'MS_WINDOWS'}). 91 If None (default), items are not filtered by this. (This is 92 different from the empty set, which filters out all such 93 conditional items.) 94 """ 95 for name, item in sorted(self.contents.items()): 96 if item.kind not in kinds: 97 continue 98 if item.abi_only and not include_abi_only: 99 continue 100 if (ifdef is not None 101 and item.ifdef is not None 102 and item.ifdef not in ifdef): 103 continue 104 yield item 105 106 def dump(self): 107 """Yield lines to recreate the manifest file (sans comments/newlines)""" 108 # Recursive in preparation for struct member & function argument nodes 109 for item in self.contents.values(): 110 yield from item.dump(indent=0) 111 112@dataclasses.dataclass 113class ABIItem: 114 """Information on one item (function, macro, struct, etc.)""" 115 116 kind: str 117 name: str 118 added: str = None 119 contents: list = dataclasses.field(default_factory=list) 120 abi_only: bool = False 121 ifdef: str = None 122 123 KINDS = frozenset({ 124 'struct', 'function', 'macro', 'data', 'const', 'typedef', 125 }) 126 127 def dump(self, indent=0): 128 yield f"{' ' * indent}{self.kind} {self.name}" 129 if self.added: 130 yield f"{' ' * (indent+1)}added {self.added}" 131 if self.ifdef: 132 yield f"{' ' * (indent+1)}ifdef {self.ifdef}" 133 if self.abi_only: 134 yield f"{' ' * (indent+1)}abi_only" 135 136def parse_manifest(file): 137 """Parse the given file (iterable of lines) to a Manifest""" 138 139 LINE_RE = re.compile('(?P<indent>[ ]*)(?P<kind>[^ ]+)[ ]*(?P<content>.*)') 140 manifest = Manifest() 141 142 # parents of currently processed line, each with its indentation level 143 levels = [(manifest, -1)] 144 145 def raise_error(msg): 146 raise SyntaxError(f'line {lineno}: {msg}') 147 148 for lineno, line in enumerate(file, start=1): 149 line, sep, comment = line.partition('#') 150 line = line.rstrip() 151 if not line: 152 continue 153 match = LINE_RE.fullmatch(line) 154 if not match: 155 raise_error(f'invalid syntax: {line}') 156 level = len(match['indent']) 157 kind = match['kind'] 158 content = match['content'] 159 while level <= levels[-1][1]: 160 levels.pop() 161 parent = levels[-1][0] 162 entry = None 163 if kind in ABIItem.KINDS: 164 if parent.kind not in {'manifest'}: 165 raise_error(f'{kind} cannot go in {parent.kind}') 166 entry = ABIItem(kind, content) 167 parent.add(entry) 168 elif kind in {'added', 'ifdef'}: 169 if parent.kind not in ABIItem.KINDS: 170 raise_error(f'{kind} cannot go in {parent.kind}') 171 setattr(parent, kind, content) 172 elif kind in {'abi_only'}: 173 if parent.kind not in {'function', 'data'}: 174 raise_error(f'{kind} cannot go in {parent.kind}') 175 parent.abi_only = True 176 else: 177 raise_error(f"unknown kind {kind!r}") 178 levels.append((entry, level)) 179 return manifest 180 181# The tool can run individual "actions". 182# Most actions are "generators", which generate a single file from the 183# manifest. (Checking works by generating a temp file & comparing.) 184# Other actions, like "--unixy-check", don't work on a single file. 185 186generators = [] 187def generator(var_name, default_path): 188 """Decorates a file generator: function that writes to a file""" 189 def _decorator(func): 190 func.var_name = var_name 191 func.arg_name = '--' + var_name.replace('_', '-') 192 func.default_path = default_path 193 generators.append(func) 194 return func 195 return _decorator 196 197 198@generator("python3dll", 'PC/python3dll.c') 199def gen_python3dll(manifest, args, outfile): 200 """Generate/check the source for the Windows stable ABI library""" 201 write = partial(print, file=outfile) 202 write(textwrap.dedent(r""" 203 /* Re-export stable Python ABI */ 204 205 /* Generated by Tools/scripts/stable_abi.py */ 206 207 #ifdef _M_IX86 208 #define DECORATE "_" 209 #else 210 #define DECORATE 211 #endif 212 213 #define EXPORT_FUNC(name) \ 214 __pragma(comment(linker, "/EXPORT:" DECORATE #name "=" PYTHON_DLL_NAME "." #name)) 215 #define EXPORT_DATA(name) \ 216 __pragma(comment(linker, "/EXPORT:" DECORATE #name "=" PYTHON_DLL_NAME "." #name ",DATA")) 217 """)) 218 219 def sort_key(item): 220 return item.name.lower() 221 222 for item in sorted( 223 manifest.select( 224 {'function'}, include_abi_only=True, ifdef={'MS_WINDOWS'}), 225 key=sort_key): 226 write(f'EXPORT_FUNC({item.name})') 227 228 write() 229 230 for item in sorted( 231 manifest.select( 232 {'data'}, include_abi_only=True, ifdef={'MS_WINDOWS'}), 233 key=sort_key): 234 write(f'EXPORT_DATA({item.name})') 235 236REST_ROLES = { 237 'function': 'function', 238 'data': 'var', 239 'struct': 'type', 240 'macro': 'macro', 241 # 'const': 'const', # all undocumented 242 'typedef': 'type', 243} 244 245@generator("doc_list", 'Doc/data/stable_abi.dat') 246def gen_doc_annotations(manifest, args, outfile): 247 """Generate/check the stable ABI list for documentation annotations""" 248 writer = csv.DictWriter( 249 outfile, ['role', 'name', 'added', 'ifdef_note'], lineterminator='\n') 250 writer.writeheader() 251 for item in manifest.select(REST_ROLES.keys(), include_abi_only=False): 252 if item.ifdef: 253 ifdef_note = IFDEF_DOC_NOTES[item.ifdef] 254 else: 255 ifdef_note = None 256 writer.writerow({ 257 'role': REST_ROLES[item.kind], 258 'name': item.name, 259 'added': item.added, 260 'ifdef_note': ifdef_note}) 261 262def generate_or_check(manifest, args, path, func): 263 """Generate/check a file with a single generator 264 265 Return True if successful; False if a comparison failed. 266 """ 267 268 outfile = io.StringIO() 269 func(manifest, args, outfile) 270 generated = outfile.getvalue() 271 existing = path.read_text() 272 273 if generated != existing: 274 if args.generate: 275 path.write_text(generated) 276 else: 277 print(f'File {path} differs from expected!') 278 diff = difflib.unified_diff( 279 generated.splitlines(), existing.splitlines(), 280 str(path), '<expected>', 281 lineterm='', 282 ) 283 for line in diff: 284 print(line) 285 return False 286 return True 287 288 289def do_unixy_check(manifest, args): 290 """Check headers & library using "Unixy" tools (GCC/clang, binutils)""" 291 okay = True 292 293 # Get all macros first: we'll need feature macros like HAVE_FORK and 294 # MS_WINDOWS for everything else 295 present_macros = gcc_get_limited_api_macros(['Include/Python.h']) 296 feature_defines = manifest.feature_defines & present_macros 297 298 # Check that we have all needed macros 299 expected_macros = set( 300 item.name for item in manifest.select({'macro'}) 301 ) 302 missing_macros = expected_macros - present_macros 303 okay &= _report_unexpected_items( 304 missing_macros, 305 'Some macros from are not defined from "Include/Python.h"' 306 + 'with Py_LIMITED_API:') 307 308 expected_symbols = set(item.name for item in manifest.select( 309 {'function', 'data'}, include_abi_only=True, ifdef=feature_defines, 310 )) 311 312 # Check the static library (*.a) 313 LIBRARY = sysconfig.get_config_var("LIBRARY") 314 if not LIBRARY: 315 raise Exception("failed to get LIBRARY variable from sysconfig") 316 if os.path.exists(LIBRARY): 317 okay &= binutils_check_library( 318 manifest, LIBRARY, expected_symbols, dynamic=False) 319 320 # Check the dynamic library (*.so) 321 LDLIBRARY = sysconfig.get_config_var("LDLIBRARY") 322 if not LDLIBRARY: 323 raise Exception("failed to get LDLIBRARY variable from sysconfig") 324 okay &= binutils_check_library( 325 manifest, LDLIBRARY, expected_symbols, dynamic=False) 326 327 # Check definitions in the header files 328 expected_defs = set(item.name for item in manifest.select( 329 {'function', 'data'}, include_abi_only=False, ifdef=feature_defines, 330 )) 331 found_defs = gcc_get_limited_api_definitions(['Include/Python.h']) 332 missing_defs = expected_defs - found_defs 333 okay &= _report_unexpected_items( 334 missing_defs, 335 'Some expected declarations were not declared in ' 336 + '"Include/Python.h" with Py_LIMITED_API:') 337 338 # Some Limited API macros are defined in terms of private symbols. 339 # These are not part of Limited API (even though they're defined with 340 # Py_LIMITED_API). They must be part of the Stable ABI, though. 341 private_symbols = {n for n in expected_symbols if n.startswith('_')} 342 extra_defs = found_defs - expected_defs - private_symbols 343 okay &= _report_unexpected_items( 344 extra_defs, 345 'Some extra declarations were found in "Include/Python.h" ' 346 + 'with Py_LIMITED_API:') 347 348 return okay 349 350 351def _report_unexpected_items(items, msg): 352 """If there are any `items`, report them using "msg" and return false""" 353 if items: 354 print(msg, file=sys.stderr) 355 for item in sorted(items): 356 print(' -', item, file=sys.stderr) 357 return False 358 return True 359 360 361def binutils_get_exported_symbols(library, dynamic=False): 362 """Retrieve exported symbols using the nm(1) tool from binutils""" 363 # Only look at dynamic symbols 364 args = ["nm", "--no-sort"] 365 if dynamic: 366 args.append("--dynamic") 367 args.append(library) 368 proc = subprocess.run(args, stdout=subprocess.PIPE, universal_newlines=True) 369 if proc.returncode: 370 sys.stdout.write(proc.stdout) 371 sys.exit(proc.returncode) 372 373 stdout = proc.stdout.rstrip() 374 if not stdout: 375 raise Exception("command output is empty") 376 377 for line in stdout.splitlines(): 378 # Split line '0000000000001b80 D PyTextIOWrapper_Type' 379 if not line: 380 continue 381 382 parts = line.split(maxsplit=2) 383 if len(parts) < 3: 384 continue 385 386 symbol = parts[-1] 387 if MACOS and symbol.startswith("_"): 388 yield symbol[1:] 389 else: 390 yield symbol 391 392 393def binutils_check_library(manifest, library, expected_symbols, dynamic): 394 """Check that library exports all expected_symbols""" 395 available_symbols = set(binutils_get_exported_symbols(library, dynamic)) 396 missing_symbols = expected_symbols - available_symbols 397 if missing_symbols: 398 print(textwrap.dedent(f"""\ 399 Some symbols from the limited API are missing from {library}: 400 {', '.join(missing_symbols)} 401 402 This error means that there are some missing symbols among the 403 ones exported in the library. 404 This normally means that some symbol, function implementation or 405 a prototype belonging to a symbol in the limited API has been 406 deleted or is missing. 407 """), file=sys.stderr) 408 return False 409 return True 410 411 412def gcc_get_limited_api_macros(headers): 413 """Get all limited API macros from headers. 414 415 Runs the preprocessor over all the header files in "Include" setting 416 "-DPy_LIMITED_API" to the correct value for the running version of the 417 interpreter and extracting all macro definitions (via adding -dM to the 418 compiler arguments). 419 420 Requires Python built with a GCC-compatible compiler. (clang might work) 421 """ 422 423 api_hexversion = sys.version_info.major << 24 | sys.version_info.minor << 16 424 425 preprocesor_output_with_macros = subprocess.check_output( 426 sysconfig.get_config_var("CC").split() 427 + [ 428 # Prevent the expansion of the exported macros so we can 429 # capture them later 430 "-DSIZEOF_WCHAR_T=4", # The actual value is not important 431 f"-DPy_LIMITED_API={api_hexversion}", 432 "-I.", 433 "-I./Include", 434 "-dM", 435 "-E", 436 ] 437 + [str(file) for file in headers], 438 text=True, 439 ) 440 441 return { 442 target 443 for target in re.findall( 444 r"#define (\w+)", preprocesor_output_with_macros 445 ) 446 } 447 448 449def gcc_get_limited_api_definitions(headers): 450 """Get all limited API definitions from headers. 451 452 Run the preprocessor over all the header files in "Include" setting 453 "-DPy_LIMITED_API" to the correct value for the running version of the 454 interpreter. 455 456 The limited API symbols will be extracted from the output of this command 457 as it includes the prototypes and definitions of all the exported symbols 458 that are in the limited api. 459 460 This function does *NOT* extract the macros defined on the limited API 461 462 Requires Python built with a GCC-compatible compiler. (clang might work) 463 """ 464 api_hexversion = sys.version_info.major << 24 | sys.version_info.minor << 16 465 preprocesor_output = subprocess.check_output( 466 sysconfig.get_config_var("CC").split() 467 + [ 468 # Prevent the expansion of the exported macros so we can capture 469 # them later 470 "-DPyAPI_FUNC=__PyAPI_FUNC", 471 "-DPyAPI_DATA=__PyAPI_DATA", 472 "-DEXPORT_DATA=__EXPORT_DATA", 473 "-D_Py_NO_RETURN=", 474 "-DSIZEOF_WCHAR_T=4", # The actual value is not important 475 f"-DPy_LIMITED_API={api_hexversion}", 476 "-I.", 477 "-I./Include", 478 "-E", 479 ] 480 + [str(file) for file in headers], 481 text=True, 482 stderr=subprocess.DEVNULL, 483 ) 484 stable_functions = set( 485 re.findall(r"__PyAPI_FUNC\(.*?\)\s*(.*?)\s*\(", preprocesor_output) 486 ) 487 stable_exported_data = set( 488 re.findall(r"__EXPORT_DATA\((.*?)\)", preprocesor_output) 489 ) 490 stable_data = set( 491 re.findall(r"__PyAPI_DATA\(.*?\)[\s\*\(]*([^);]*)\)?.*;", preprocesor_output) 492 ) 493 return stable_data | stable_exported_data | stable_functions 494 495def check_private_names(manifest): 496 """Ensure limited API doesn't contain private names 497 498 Names prefixed by an underscore are private by definition. 499 """ 500 for name, item in manifest.contents.items(): 501 if name.startswith('_') and not item.abi_only: 502 raise ValueError( 503 f'`{name}` is private (underscore-prefixed) and should be ' 504 + 'removed from the stable ABI list or or marked `abi_only`') 505 506def main(): 507 parser = argparse.ArgumentParser( 508 description=__doc__, 509 formatter_class=argparse.RawDescriptionHelpFormatter, 510 ) 511 parser.add_argument( 512 "file", type=Path, metavar='FILE', 513 help="file with the stable abi manifest", 514 ) 515 parser.add_argument( 516 "--generate", action='store_true', 517 help="generate file(s), rather than just checking them", 518 ) 519 parser.add_argument( 520 "--generate-all", action='store_true', 521 help="as --generate, but generate all file(s) using default filenames." 522 + " (unlike --all, does not run any extra checks)", 523 ) 524 parser.add_argument( 525 "-a", "--all", action='store_true', 526 help="run all available checks using default filenames", 527 ) 528 parser.add_argument( 529 "-l", "--list", action='store_true', 530 help="list available generators and their default filenames; then exit", 531 ) 532 parser.add_argument( 533 "--dump", action='store_true', 534 help="dump the manifest contents (used for debugging the parser)", 535 ) 536 537 actions_group = parser.add_argument_group('actions') 538 for gen in generators: 539 actions_group.add_argument( 540 gen.arg_name, dest=gen.var_name, 541 type=str, nargs="?", default=MISSING, 542 metavar='FILENAME', 543 help=gen.__doc__, 544 ) 545 actions_group.add_argument( 546 '--unixy-check', action='store_true', 547 help=do_unixy_check.__doc__, 548 ) 549 args = parser.parse_args() 550 551 base_path = args.file.parent.parent 552 553 if args.list: 554 for gen in generators: 555 print(f'{gen.arg_name}: {base_path / gen.default_path}') 556 sys.exit(0) 557 558 run_all_generators = args.generate_all 559 560 if args.generate_all: 561 args.generate = True 562 563 if args.all: 564 run_all_generators = True 565 args.unixy_check = True 566 567 with args.file.open() as file: 568 manifest = parse_manifest(file) 569 570 check_private_names(manifest) 571 572 # Remember results of all actions (as booleans). 573 # At the end we'll check that at least one action was run, 574 # and also fail if any are false. 575 results = {} 576 577 if args.dump: 578 for line in manifest.dump(): 579 print(line) 580 results['dump'] = True 581 582 for gen in generators: 583 filename = getattr(args, gen.var_name) 584 if filename is None or (run_all_generators and filename is MISSING): 585 filename = base_path / gen.default_path 586 elif filename is MISSING: 587 continue 588 589 results[gen.var_name] = generate_or_check(manifest, args, filename, gen) 590 591 if args.unixy_check: 592 results['unixy_check'] = do_unixy_check(manifest, args) 593 594 if not results: 595 if args.generate: 596 parser.error('No file specified. Use --help for usage.') 597 parser.error('No check specified. Use --help for usage.') 598 599 failed_results = [name for name, result in results.items() if not result] 600 601 if failed_results: 602 raise Exception(f""" 603 These checks related to the stable ABI did not succeed: 604 {', '.join(failed_results)} 605 606 If you see diffs in the output, files derived from the stable 607 ABI manifest the were not regenerated. 608 Run `make regen-limited-abi` to fix this. 609 610 Otherwise, see the error(s) above. 611 612 The stable ABI manifest is at: {args.file} 613 Note that there is a process to follow when modifying it. 614 615 You can read more about the limited API and its contracts at: 616 617 https://docs.python.org/3/c-api/stable.html 618 619 And in PEP 384: 620 621 https://www.python.org/dev/peps/pep-0384/ 622 """) 623 624 625if __name__ == "__main__": 626 main() 627