• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Check the stable ABI manifest or generate files from it
2
3By default, the tool only checks existing files/libraries.
4Pass --generate to recreate auto-generated files instead.
5
6For actions that take a FILENAME, the filename can be left out to use a default
7(relative to the manifest file, as they appear in the CPython codebase).
8"""
9
10from functools import partial
11from pathlib import Path
12import dataclasses
13import subprocess
14import sysconfig
15import argparse
16import textwrap
17import difflib
18import shutil
19import sys
20import os
21import os.path
22import io
23import re
24import csv
25
26MISSING = object()
27
28EXCLUDED_HEADERS = {
29    "bytes_methods.h",
30    "cellobject.h",
31    "classobject.h",
32    "code.h",
33    "compile.h",
34    "datetime.h",
35    "dtoa.h",
36    "frameobject.h",
37    "funcobject.h",
38    "genobject.h",
39    "longintrepr.h",
40    "parsetok.h",
41    "pyatomic.h",
42    "pytime.h",
43    "token.h",
44    "ucnhash.h",
45}
46MACOS = (sys.platform == "darwin")
47UNIXY = MACOS or (sys.platform == "linux")  # XXX should this be "not Windows"?
48
49IFDEF_DOC_NOTES = {
50    'MS_WINDOWS': 'on Windows',
51    'HAVE_FORK': 'on platforms with fork()',
52    'USE_STACKCHECK': 'on platforms with USE_STACKCHECK',
53}
54
55# The stable ABI manifest (Misc/stable_abi.txt) exists only to fill the
56# following dataclasses.
57# Feel free to change its syntax (and the `parse_manifest` function)
58# to better serve that purpose (while keeping it human-readable).
59
60@dataclasses.dataclass
61class Manifest:
62    """Collection of `ABIItem`s forming the stable ABI/limited API."""
63
64    kind = 'manifest'
65    contents: dict = dataclasses.field(default_factory=dict)
66
67    def add(self, item):
68        if item.name in self.contents:
69            # We assume that stable ABI items do not share names,
70            # even if they're different kinds (e.g. function vs. macro).
71            raise ValueError(f'duplicate ABI item {item.name}')
72        self.contents[item.name] = item
73
74    @property
75    def feature_defines(self):
76        """Return all feature defines which affect what's available
77
78        These are e.g. HAVE_FORK and MS_WINDOWS.
79        """
80        return set(item.ifdef for item in self.contents.values()) - {None}
81
82    def select(self, kinds, *, include_abi_only=True, ifdef=None):
83        """Yield selected items of the manifest
84
85        kinds: set of requested kinds, e.g. {'function', 'macro'}
86        include_abi_only: if True (default), include all items of the
87            stable ABI.
88            If False, include only items from the limited API
89            (i.e. items people should use today)
90        ifdef: set of feature defines (e.g. {'HAVE_FORK', 'MS_WINDOWS'}).
91            If None (default), items are not filtered by this. (This is
92            different from the empty set, which filters out all such
93            conditional items.)
94        """
95        for name, item in sorted(self.contents.items()):
96            if item.kind not in kinds:
97                continue
98            if item.abi_only and not include_abi_only:
99                continue
100            if (ifdef is not None
101                    and item.ifdef is not None
102                    and item.ifdef not in ifdef):
103                continue
104            yield item
105
106    def dump(self):
107        """Yield lines to recreate the manifest file (sans comments/newlines)"""
108        # Recursive in preparation for struct member & function argument nodes
109        for item in self.contents.values():
110            yield from item.dump(indent=0)
111
112@dataclasses.dataclass
113class ABIItem:
114    """Information on one item (function, macro, struct, etc.)"""
115
116    kind: str
117    name: str
118    added: str = None
119    contents: list = dataclasses.field(default_factory=list)
120    abi_only: bool = False
121    ifdef: str = None
122
123    KINDS = frozenset({
124        'struct', 'function', 'macro', 'data', 'const', 'typedef',
125    })
126
127    def dump(self, indent=0):
128        yield f"{'    ' * indent}{self.kind} {self.name}"
129        if self.added:
130            yield f"{'    ' * (indent+1)}added {self.added}"
131        if self.ifdef:
132            yield f"{'    ' * (indent+1)}ifdef {self.ifdef}"
133        if self.abi_only:
134            yield f"{'    ' * (indent+1)}abi_only"
135
136def parse_manifest(file):
137    """Parse the given file (iterable of lines) to a Manifest"""
138
139    LINE_RE = re.compile('(?P<indent>[ ]*)(?P<kind>[^ ]+)[ ]*(?P<content>.*)')
140    manifest = Manifest()
141
142    # parents of currently processed line, each with its indentation level
143    levels = [(manifest, -1)]
144
145    def raise_error(msg):
146        raise SyntaxError(f'line {lineno}: {msg}')
147
148    for lineno, line in enumerate(file, start=1):
149        line, sep, comment = line.partition('#')
150        line = line.rstrip()
151        if not line:
152            continue
153        match = LINE_RE.fullmatch(line)
154        if not match:
155            raise_error(f'invalid syntax: {line}')
156        level = len(match['indent'])
157        kind = match['kind']
158        content = match['content']
159        while level <= levels[-1][1]:
160            levels.pop()
161        parent = levels[-1][0]
162        entry = None
163        if kind in ABIItem.KINDS:
164            if parent.kind not in {'manifest'}:
165                raise_error(f'{kind} cannot go in {parent.kind}')
166            entry = ABIItem(kind, content)
167            parent.add(entry)
168        elif kind in {'added', 'ifdef'}:
169            if parent.kind not in ABIItem.KINDS:
170                raise_error(f'{kind} cannot go in {parent.kind}')
171            setattr(parent, kind, content)
172        elif kind in {'abi_only'}:
173            if parent.kind not in {'function', 'data'}:
174                raise_error(f'{kind} cannot go in {parent.kind}')
175            parent.abi_only = True
176        else:
177            raise_error(f"unknown kind {kind!r}")
178        levels.append((entry, level))
179    return manifest
180
181# The tool can run individual "actions".
182# Most actions are "generators", which generate a single file from the
183# manifest. (Checking works by generating a temp file & comparing.)
184# Other actions, like "--unixy-check", don't work on a single file.
185
186generators = []
187def generator(var_name, default_path):
188    """Decorates a file generator: function that writes to a file"""
189    def _decorator(func):
190        func.var_name = var_name
191        func.arg_name = '--' + var_name.replace('_', '-')
192        func.default_path = default_path
193        generators.append(func)
194        return func
195    return _decorator
196
197
198@generator("python3dll", 'PC/python3dll.c')
199def gen_python3dll(manifest, args, outfile):
200    """Generate/check the source for the Windows stable ABI library"""
201    write = partial(print, file=outfile)
202    write(textwrap.dedent(r"""
203        /* Re-export stable Python ABI */
204
205        /* Generated by Tools/scripts/stable_abi.py */
206
207        #ifdef _M_IX86
208        #define DECORATE "_"
209        #else
210        #define DECORATE
211        #endif
212
213        #define EXPORT_FUNC(name) \
214            __pragma(comment(linker, "/EXPORT:" DECORATE #name "=" PYTHON_DLL_NAME "." #name))
215        #define EXPORT_DATA(name) \
216            __pragma(comment(linker, "/EXPORT:" DECORATE #name "=" PYTHON_DLL_NAME "." #name ",DATA"))
217    """))
218
219    def sort_key(item):
220        return item.name.lower()
221
222    for item in sorted(
223            manifest.select(
224                {'function'}, include_abi_only=True, ifdef={'MS_WINDOWS'}),
225            key=sort_key):
226        write(f'EXPORT_FUNC({item.name})')
227
228    write()
229
230    for item in sorted(
231            manifest.select(
232                {'data'}, include_abi_only=True, ifdef={'MS_WINDOWS'}),
233            key=sort_key):
234        write(f'EXPORT_DATA({item.name})')
235
236REST_ROLES = {
237    'function': 'function',
238    'data': 'var',
239    'struct': 'type',
240    'macro': 'macro',
241    # 'const': 'const',  # all undocumented
242    'typedef': 'type',
243}
244
245@generator("doc_list", 'Doc/data/stable_abi.dat')
246def gen_doc_annotations(manifest, args, outfile):
247    """Generate/check the stable ABI list for documentation annotations"""
248    writer = csv.DictWriter(
249        outfile, ['role', 'name', 'added', 'ifdef_note'], lineterminator='\n')
250    writer.writeheader()
251    for item in manifest.select(REST_ROLES.keys(), include_abi_only=False):
252        if item.ifdef:
253            ifdef_note = IFDEF_DOC_NOTES[item.ifdef]
254        else:
255            ifdef_note = None
256        writer.writerow({
257            'role': REST_ROLES[item.kind],
258            'name': item.name,
259            'added': item.added,
260            'ifdef_note': ifdef_note})
261
262def generate_or_check(manifest, args, path, func):
263    """Generate/check a file with a single generator
264
265    Return True if successful; False if a comparison failed.
266    """
267
268    outfile = io.StringIO()
269    func(manifest, args, outfile)
270    generated = outfile.getvalue()
271    existing = path.read_text()
272
273    if generated != existing:
274        if args.generate:
275            path.write_text(generated)
276        else:
277            print(f'File {path} differs from expected!')
278            diff = difflib.unified_diff(
279                generated.splitlines(), existing.splitlines(),
280                str(path), '<expected>',
281                lineterm='',
282            )
283            for line in diff:
284                print(line)
285            return False
286    return True
287
288
289def do_unixy_check(manifest, args):
290    """Check headers & library using "Unixy" tools (GCC/clang, binutils)"""
291    okay = True
292
293    # Get all macros first: we'll need feature macros like HAVE_FORK and
294    # MS_WINDOWS for everything else
295    present_macros = gcc_get_limited_api_macros(['Include/Python.h'])
296    feature_defines = manifest.feature_defines & present_macros
297
298    # Check that we have all needed macros
299    expected_macros = set(
300        item.name for item in manifest.select({'macro'})
301    )
302    missing_macros = expected_macros - present_macros
303    okay &= _report_unexpected_items(
304        missing_macros,
305        'Some macros from are not defined from "Include/Python.h"'
306        + 'with Py_LIMITED_API:')
307
308    expected_symbols = set(item.name for item in manifest.select(
309        {'function', 'data'}, include_abi_only=True, ifdef=feature_defines,
310    ))
311
312    # Check the static library (*.a)
313    LIBRARY = sysconfig.get_config_var("LIBRARY")
314    if not LIBRARY:
315        raise Exception("failed to get LIBRARY variable from sysconfig")
316    if os.path.exists(LIBRARY):
317        okay &= binutils_check_library(
318            manifest, LIBRARY, expected_symbols, dynamic=False)
319
320    # Check the dynamic library (*.so)
321    LDLIBRARY = sysconfig.get_config_var("LDLIBRARY")
322    if not LDLIBRARY:
323        raise Exception("failed to get LDLIBRARY variable from sysconfig")
324    okay &= binutils_check_library(
325            manifest, LDLIBRARY, expected_symbols, dynamic=False)
326
327    # Check definitions in the header files
328    expected_defs = set(item.name for item in manifest.select(
329        {'function', 'data'}, include_abi_only=False, ifdef=feature_defines,
330    ))
331    found_defs = gcc_get_limited_api_definitions(['Include/Python.h'])
332    missing_defs = expected_defs - found_defs
333    okay &= _report_unexpected_items(
334        missing_defs,
335        'Some expected declarations were not declared in '
336        + '"Include/Python.h" with Py_LIMITED_API:')
337
338    # Some Limited API macros are defined in terms of private symbols.
339    # These are not part of Limited API (even though they're defined with
340    # Py_LIMITED_API). They must be part of the Stable ABI, though.
341    private_symbols = {n for n in expected_symbols if n.startswith('_')}
342    extra_defs = found_defs - expected_defs - private_symbols
343    okay &= _report_unexpected_items(
344        extra_defs,
345        'Some extra declarations were found in "Include/Python.h" '
346        + 'with Py_LIMITED_API:')
347
348    return okay
349
350
351def _report_unexpected_items(items, msg):
352    """If there are any `items`, report them using "msg" and return false"""
353    if items:
354        print(msg, file=sys.stderr)
355        for item in sorted(items):
356            print(' -', item, file=sys.stderr)
357        return False
358    return True
359
360
361def binutils_get_exported_symbols(library, dynamic=False):
362    """Retrieve exported symbols using the nm(1) tool from binutils"""
363    # Only look at dynamic symbols
364    args = ["nm", "--no-sort"]
365    if dynamic:
366        args.append("--dynamic")
367    args.append(library)
368    proc = subprocess.run(args, stdout=subprocess.PIPE, universal_newlines=True)
369    if proc.returncode:
370        sys.stdout.write(proc.stdout)
371        sys.exit(proc.returncode)
372
373    stdout = proc.stdout.rstrip()
374    if not stdout:
375        raise Exception("command output is empty")
376
377    for line in stdout.splitlines():
378        # Split line '0000000000001b80 D PyTextIOWrapper_Type'
379        if not line:
380            continue
381
382        parts = line.split(maxsplit=2)
383        if len(parts) < 3:
384            continue
385
386        symbol = parts[-1]
387        if MACOS and symbol.startswith("_"):
388            yield symbol[1:]
389        else:
390            yield symbol
391
392
393def binutils_check_library(manifest, library, expected_symbols, dynamic):
394    """Check that library exports all expected_symbols"""
395    available_symbols = set(binutils_get_exported_symbols(library, dynamic))
396    missing_symbols = expected_symbols - available_symbols
397    if missing_symbols:
398        print(textwrap.dedent(f"""\
399            Some symbols from the limited API are missing from {library}:
400                {', '.join(missing_symbols)}
401
402            This error means that there are some missing symbols among the
403            ones exported in the library.
404            This normally means that some symbol, function implementation or
405            a prototype belonging to a symbol in the limited API has been
406            deleted or is missing.
407        """), file=sys.stderr)
408        return False
409    return True
410
411
412def gcc_get_limited_api_macros(headers):
413    """Get all limited API macros from headers.
414
415    Runs the preprocessor over all the header files in "Include" setting
416    "-DPy_LIMITED_API" to the correct value for the running version of the
417    interpreter and extracting all macro definitions (via adding -dM to the
418    compiler arguments).
419
420    Requires Python built with a GCC-compatible compiler. (clang might work)
421    """
422
423    api_hexversion = sys.version_info.major << 24 | sys.version_info.minor << 16
424
425    preprocesor_output_with_macros = subprocess.check_output(
426        sysconfig.get_config_var("CC").split()
427        + [
428            # Prevent the expansion of the exported macros so we can
429            # capture them later
430            "-DSIZEOF_WCHAR_T=4",  # The actual value is not important
431            f"-DPy_LIMITED_API={api_hexversion}",
432            "-I.",
433            "-I./Include",
434            "-dM",
435            "-E",
436        ]
437        + [str(file) for file in headers],
438        text=True,
439    )
440
441    return {
442        target
443        for target in re.findall(
444            r"#define (\w+)", preprocesor_output_with_macros
445        )
446    }
447
448
449def gcc_get_limited_api_definitions(headers):
450    """Get all limited API definitions from headers.
451
452    Run the preprocessor over all the header files in "Include" setting
453    "-DPy_LIMITED_API" to the correct value for the running version of the
454    interpreter.
455
456    The limited API symbols will be extracted from the output of this command
457    as it includes the prototypes and definitions of all the exported symbols
458    that are in the limited api.
459
460    This function does *NOT* extract the macros defined on the limited API
461
462    Requires Python built with a GCC-compatible compiler. (clang might work)
463    """
464    api_hexversion = sys.version_info.major << 24 | sys.version_info.minor << 16
465    preprocesor_output = subprocess.check_output(
466        sysconfig.get_config_var("CC").split()
467        + [
468            # Prevent the expansion of the exported macros so we can capture
469            # them later
470            "-DPyAPI_FUNC=__PyAPI_FUNC",
471            "-DPyAPI_DATA=__PyAPI_DATA",
472            "-DEXPORT_DATA=__EXPORT_DATA",
473            "-D_Py_NO_RETURN=",
474            "-DSIZEOF_WCHAR_T=4",  # The actual value is not important
475            f"-DPy_LIMITED_API={api_hexversion}",
476            "-I.",
477            "-I./Include",
478            "-E",
479        ]
480        + [str(file) for file in headers],
481        text=True,
482        stderr=subprocess.DEVNULL,
483    )
484    stable_functions = set(
485        re.findall(r"__PyAPI_FUNC\(.*?\)\s*(.*?)\s*\(", preprocesor_output)
486    )
487    stable_exported_data = set(
488        re.findall(r"__EXPORT_DATA\((.*?)\)", preprocesor_output)
489    )
490    stable_data = set(
491        re.findall(r"__PyAPI_DATA\(.*?\)[\s\*\(]*([^);]*)\)?.*;", preprocesor_output)
492    )
493    return stable_data | stable_exported_data | stable_functions
494
495def check_private_names(manifest):
496    """Ensure limited API doesn't contain private names
497
498    Names prefixed by an underscore are private by definition.
499    """
500    for name, item in manifest.contents.items():
501        if name.startswith('_') and not item.abi_only:
502            raise ValueError(
503                f'`{name}` is private (underscore-prefixed) and should be '
504                + 'removed from the stable ABI list or or marked `abi_only`')
505
506def main():
507    parser = argparse.ArgumentParser(
508        description=__doc__,
509        formatter_class=argparse.RawDescriptionHelpFormatter,
510    )
511    parser.add_argument(
512        "file", type=Path, metavar='FILE',
513        help="file with the stable abi manifest",
514    )
515    parser.add_argument(
516        "--generate", action='store_true',
517        help="generate file(s), rather than just checking them",
518    )
519    parser.add_argument(
520        "--generate-all", action='store_true',
521        help="as --generate, but generate all file(s) using default filenames."
522            + " (unlike --all, does not run any extra checks)",
523    )
524    parser.add_argument(
525        "-a", "--all", action='store_true',
526        help="run all available checks using default filenames",
527    )
528    parser.add_argument(
529        "-l", "--list", action='store_true',
530        help="list available generators and their default filenames; then exit",
531    )
532    parser.add_argument(
533        "--dump", action='store_true',
534        help="dump the manifest contents (used for debugging the parser)",
535    )
536
537    actions_group = parser.add_argument_group('actions')
538    for gen in generators:
539        actions_group.add_argument(
540            gen.arg_name, dest=gen.var_name,
541            type=str, nargs="?", default=MISSING,
542            metavar='FILENAME',
543            help=gen.__doc__,
544        )
545    actions_group.add_argument(
546        '--unixy-check', action='store_true',
547        help=do_unixy_check.__doc__,
548    )
549    args = parser.parse_args()
550
551    base_path = args.file.parent.parent
552
553    if args.list:
554        for gen in generators:
555            print(f'{gen.arg_name}: {base_path / gen.default_path}')
556        sys.exit(0)
557
558    run_all_generators = args.generate_all
559
560    if args.generate_all:
561        args.generate = True
562
563    if args.all:
564        run_all_generators = True
565        args.unixy_check = True
566
567    with args.file.open() as file:
568        manifest = parse_manifest(file)
569
570    check_private_names(manifest)
571
572    # Remember results of all actions (as booleans).
573    # At the end we'll check that at least one action was run,
574    # and also fail if any are false.
575    results = {}
576
577    if args.dump:
578        for line in manifest.dump():
579            print(line)
580        results['dump'] = True
581
582    for gen in generators:
583        filename = getattr(args, gen.var_name)
584        if filename is None or (run_all_generators and filename is MISSING):
585            filename = base_path / gen.default_path
586        elif filename is MISSING:
587            continue
588
589        results[gen.var_name] = generate_or_check(manifest, args, filename, gen)
590
591    if args.unixy_check:
592        results['unixy_check'] = do_unixy_check(manifest, args)
593
594    if not results:
595        if args.generate:
596            parser.error('No file specified. Use --help for usage.')
597        parser.error('No check specified. Use --help for usage.')
598
599    failed_results = [name for name, result in results.items() if not result]
600
601    if failed_results:
602        raise Exception(f"""
603        These checks related to the stable ABI did not succeed:
604            {', '.join(failed_results)}
605
606        If you see diffs in the output, files derived from the stable
607        ABI manifest the were not regenerated.
608        Run `make regen-limited-abi` to fix this.
609
610        Otherwise, see the error(s) above.
611
612        The stable ABI manifest is at: {args.file}
613        Note that there is a process to follow when modifying it.
614
615        You can read more about the limited API and its contracts at:
616
617        https://docs.python.org/3/c-api/stable.html
618
619        And in PEP 384:
620
621        https://www.python.org/dev/peps/pep-0384/
622        """)
623
624
625if __name__ == "__main__":
626    main()
627