• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import contextlib
2import io
3import os.path
4import re
5
6SCRIPT_NAME = 'Tools/build/generate_global_objects.py'
7__file__ = os.path.abspath(__file__)
8ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
9INTERNAL = os.path.join(ROOT, 'Include', 'internal')
10
11
12IGNORED = {
13    'ACTION',  # Python/_warnings.c
14    'ATTR',  # Python/_warnings.c and Objects/funcobject.c
15    'DUNDER',  # Objects/typeobject.c
16    'RDUNDER',  # Objects/typeobject.c
17    'SPECIAL',  # Objects/weakrefobject.c
18    'NAME',  # Objects/typeobject.c
19}
20IDENTIFIERS = [
21    # from ADD() Python/_warnings.c
22    'default',
23    'ignore',
24
25    # from GET_WARNINGS_ATTR() in Python/_warnings.c
26    'WarningMessage',
27    '_showwarnmsg',
28    '_warn_unawaited_coroutine',
29    'defaultaction',
30    'filters',
31    'onceregistry',
32
33    # from WRAP_METHOD() in Objects/weakrefobject.c
34    '__bytes__',
35    '__reversed__',
36
37    # from COPY_ATTR() in Objects/funcobject.c
38    '__module__',
39    '__name__',
40    '__qualname__',
41    '__doc__',
42    '__annotations__',
43
44    # from SLOT* in Objects/typeobject.c
45    '__abs__',
46    '__add__',
47    '__aiter__',
48    '__and__',
49    '__anext__',
50    '__await__',
51    '__bool__',
52    '__call__',
53    '__contains__',
54    '__del__',
55    '__delattr__',
56    '__delete__',
57    '__delitem__',
58    '__eq__',
59    '__float__',
60    '__floordiv__',
61    '__ge__',
62    '__get__',
63    '__getattr__',
64    '__getattribute__',
65    '__getitem__',
66    '__gt__',
67    '__hash__',
68    '__iadd__',
69    '__iand__',
70    '__ifloordiv__',
71    '__ilshift__',
72    '__imatmul__',
73    '__imod__',
74    '__imul__',
75    '__index__',
76    '__init__',
77    '__int__',
78    '__invert__',
79    '__ior__',
80    '__ipow__',
81    '__irshift__',
82    '__isub__',
83    '__iter__',
84    '__itruediv__',
85    '__ixor__',
86    '__le__',
87    '__len__',
88    '__lshift__',
89    '__lt__',
90    '__matmul__',
91    '__mod__',
92    '__mul__',
93    '__ne__',
94    '__neg__',
95    '__new__',
96    '__next__',
97    '__or__',
98    '__pos__',
99    '__pow__',
100    '__radd__',
101    '__rand__',
102    '__repr__',
103    '__rfloordiv__',
104    '__rlshift__',
105    '__rmatmul__',
106    '__rmod__',
107    '__rmul__',
108    '__ror__',
109    '__rpow__',
110    '__rrshift__',
111    '__rshift__',
112    '__rsub__',
113    '__rtruediv__',
114    '__rxor__',
115    '__set__',
116    '__setattr__',
117    '__setitem__',
118    '__str__',
119    '__sub__',
120    '__truediv__',
121    '__xor__',
122    '__divmod__',
123    '__rdivmod__',
124    '__buffer__',
125    '__release_buffer__',
126
127    #Workarounds for GH-108918
128    'alias',
129    'args',
130    'exc_type',
131    'exc_value',
132    'self',
133    'traceback',
134]
135
136NON_GENERATED_IMMORTAL_OBJECTS = [
137    # The generated ones come from generate_runtime_init().
138    '(PyObject *)&_Py_SINGLETON(bytes_empty)',
139    '(PyObject *)&_Py_SINGLETON(tuple_empty)',
140    '(PyObject *)&_Py_SINGLETON(hamt_bitmap_node_empty)',
141    '(PyObject *)&_Py_INTERP_SINGLETON(interp, hamt_empty)',
142    '(PyObject *)&_Py_SINGLETON(context_token_missing)',
143]
144
145
146#######################################
147# helpers
148
149def iter_files():
150    for name in ('Modules', 'Objects', 'Parser', 'PC', 'Programs', 'Python'):
151        root = os.path.join(ROOT, name)
152        for dirname, _, files in os.walk(root):
153            for name in files:
154                if not name.endswith(('.c', '.h')):
155                    continue
156                yield os.path.join(dirname, name)
157
158
159def iter_global_strings():
160    id_regex = re.compile(r'\b_Py_ID\((\w+)\)')
161    str_regex = re.compile(r'\b_Py_DECLARE_STR\((\w+), "(.*?)"\)')
162    for filename in iter_files():
163        try:
164            infile = open(filename, encoding='utf-8')
165        except FileNotFoundError:
166            # The file must have been a temporary file.
167            continue
168        with infile:
169            for lno, line in enumerate(infile, 1):
170                for m in id_regex.finditer(line):
171                    identifier, = m.groups()
172                    yield identifier, None, filename, lno, line
173                for m in str_regex.finditer(line):
174                    varname, string = m.groups()
175                    yield varname, string, filename, lno, line
176
177
178def iter_to_marker(lines, marker):
179    for line in lines:
180        if line.rstrip() == marker:
181            break
182        yield line
183
184
185class Printer:
186
187    def __init__(self, file):
188        self.level = 0
189        self.file = file
190        self.continuation = [False]
191
192    @contextlib.contextmanager
193    def indent(self):
194        save_level = self.level
195        try:
196            self.level += 1
197            yield
198        finally:
199            self.level = save_level
200
201    def write(self, arg):
202        eol = '\n'
203        if self.continuation[-1]:
204            eol = f' \\{eol}' if arg else f'\\{eol}'
205        self.file.writelines(("    "*self.level, arg, eol))
206
207    @contextlib.contextmanager
208    def block(self, prefix, suffix="", *, continuation=None):
209        if continuation is None:
210            continuation = self.continuation[-1]
211        self.continuation.append(continuation)
212
213        self.write(prefix + " {")
214        with self.indent():
215            yield
216        self.continuation.pop()
217        self.write("}" + suffix)
218
219
220@contextlib.contextmanager
221def open_for_changes(filename, orig):
222    """Like open() but only write to the file if it changed."""
223    outfile = io.StringIO()
224    yield outfile
225    text = outfile.getvalue()
226    if text != orig:
227        with open(filename, 'w', encoding='utf-8') as outfile:
228            outfile.write(text)
229    else:
230        print(f'# not changed: {filename}')
231
232
233#######################################
234# the global objects
235
236START = f'/* The following is auto-generated by {SCRIPT_NAME}. */'
237END = '/* End auto-generated code */'
238
239
240def generate_global_strings(identifiers, strings):
241    filename = os.path.join(INTERNAL, 'pycore_global_strings.h')
242
243    # Read the non-generated part of the file.
244    with open(filename) as infile:
245        orig = infile.read()
246    lines = iter(orig.rstrip().splitlines())
247    before = '\n'.join(iter_to_marker(lines, START))
248    for _ in iter_to_marker(lines, END):
249        pass
250    after = '\n'.join(lines)
251
252    # Generate the file.
253    with open_for_changes(filename, orig) as outfile:
254        printer = Printer(outfile)
255        printer.write(before)
256        printer.write(START)
257        with printer.block('struct _Py_global_strings', ';'):
258            with printer.block('struct', ' literals;'):
259                for literal, name in sorted(strings.items(), key=lambda x: x[1]):
260                    printer.write(f'STRUCT_FOR_STR({name}, "{literal}")')
261            outfile.write('\n')
262            with printer.block('struct', ' identifiers;'):
263                for name in sorted(identifiers):
264                    assert name.isidentifier(), name
265                    printer.write(f'STRUCT_FOR_ID({name})')
266            with printer.block('struct', ' ascii[128];'):
267                printer.write("PyASCIIObject _ascii;")
268                printer.write("uint8_t _data[2];")
269            with printer.block('struct', ' latin1[128];'):
270                printer.write("PyCompactUnicodeObject _latin1;")
271                printer.write("uint8_t _data[2];")
272        printer.write(END)
273        printer.write(after)
274
275
276def generate_runtime_init(identifiers, strings):
277    # First get some info from the declarations.
278    nsmallposints = None
279    nsmallnegints = None
280    with open(os.path.join(INTERNAL, 'pycore_global_objects.h')) as infile:
281        for line in infile:
282            if line.startswith('#define _PY_NSMALLPOSINTS'):
283                nsmallposints = int(line.split()[-1])
284            elif line.startswith('#define _PY_NSMALLNEGINTS'):
285                nsmallnegints = int(line.split()[-1])
286                break
287        else:
288            raise NotImplementedError
289    assert nsmallposints and nsmallnegints
290
291    # Then target the runtime initializer.
292    filename = os.path.join(INTERNAL, 'pycore_runtime_init_generated.h')
293
294    # Read the non-generated part of the file.
295    with open(filename) as infile:
296        orig = infile.read()
297    lines = iter(orig.rstrip().splitlines())
298    before = '\n'.join(iter_to_marker(lines, START))
299    for _ in iter_to_marker(lines, END):
300        pass
301    after = '\n'.join(lines)
302
303    # Generate the file.
304    with open_for_changes(filename, orig) as outfile:
305        immortal_objects = []
306        printer = Printer(outfile)
307        printer.write(before)
308        printer.write(START)
309        with printer.block('#define _Py_small_ints_INIT', continuation=True):
310            for i in range(-nsmallnegints, nsmallposints):
311                printer.write(f'_PyLong_DIGIT_INIT({i}),')
312                immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + {i}]')
313        printer.write('')
314        with printer.block('#define _Py_bytes_characters_INIT', continuation=True):
315            for i in range(256):
316                printer.write(f'_PyBytes_CHAR_INIT({i}),')
317                immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(bytes_characters)[{i}]')
318        printer.write('')
319        with printer.block('#define _Py_str_literals_INIT', continuation=True):
320            for literal, name in sorted(strings.items(), key=lambda x: x[1]):
321                printer.write(f'INIT_STR({name}, "{literal}"),')
322                immortal_objects.append(f'(PyObject *)&_Py_STR({name})')
323        printer.write('')
324        with printer.block('#define _Py_str_identifiers_INIT', continuation=True):
325            for name in sorted(identifiers):
326                assert name.isidentifier(), name
327                printer.write(f'INIT_ID({name}),')
328                immortal_objects.append(f'(PyObject *)&_Py_ID({name})')
329        printer.write('')
330        with printer.block('#define _Py_str_ascii_INIT', continuation=True):
331            for i in range(128):
332                printer.write(f'_PyASCIIObject_INIT("\\x{i:02x}"),')
333                immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).ascii[{i}]')
334        printer.write('')
335        with printer.block('#define _Py_str_latin1_INIT', continuation=True):
336            for i in range(128, 256):
337                utf8 = ['"']
338                for c in chr(i).encode('utf-8'):
339                    utf8.append(f"\\x{c:02x}")
340                utf8.append('"')
341                printer.write(f'_PyUnicode_LATIN1_INIT("\\x{i:02x}", {"".join(utf8)}),')
342                immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).latin1[{i} - 128]')
343        printer.write(END)
344        printer.write(after)
345        return immortal_objects
346
347
348def generate_static_strings_initializer(identifiers, strings):
349    # Target the runtime initializer.
350    filename = os.path.join(INTERNAL, 'pycore_unicodeobject_generated.h')
351
352    # Read the non-generated part of the file.
353    with open(filename) as infile:
354        orig = infile.read()
355    lines = iter(orig.rstrip().splitlines())
356    before = '\n'.join(iter_to_marker(lines, START))
357    for _ in iter_to_marker(lines, END):
358        pass
359    after = '\n'.join(lines)
360
361    # Generate the file.
362    with open_for_changes(filename, orig) as outfile:
363        printer = Printer(outfile)
364        printer.write(before)
365        printer.write(START)
366        printer.write("static inline void")
367        with printer.block("_PyUnicode_InitStaticStrings(PyInterpreterState *interp)"):
368            printer.write(f'PyObject *string;')
369            for i in sorted(identifiers):
370                # This use of _Py_ID() is ignored by iter_global_strings()
371                # since iter_files() ignores .h files.
372                printer.write(f'string = &_Py_ID({i});')
373                printer.write(f'_PyUnicode_InternStatic(interp, &string);')
374                printer.write(f'assert(_PyUnicode_CheckConsistency(string, 1));')
375                printer.write(f'assert(PyUnicode_GET_LENGTH(string) != 1);')
376            for value, name in sorted(strings.items()):
377                printer.write(f'string = &_Py_STR({name});')
378                printer.write(f'_PyUnicode_InternStatic(interp, &string);')
379                printer.write(f'assert(_PyUnicode_CheckConsistency(string, 1));')
380                printer.write(f'assert(PyUnicode_GET_LENGTH(string) != 1);')
381        printer.write(END)
382        printer.write(after)
383
384
385def generate_global_object_finalizers(generated_immortal_objects):
386    # Target the runtime initializer.
387    filename = os.path.join(INTERNAL, 'pycore_global_objects_fini_generated.h')
388
389    # Read the non-generated part of the file.
390    with open(filename) as infile:
391        orig = infile.read()
392    lines = iter(orig.rstrip().splitlines())
393    before = '\n'.join(iter_to_marker(lines, START))
394    for _ in iter_to_marker(lines, END):
395        pass
396    after = '\n'.join(lines)
397
398    # Generate the file.
399    with open_for_changes(filename, orig) as outfile:
400        printer = Printer(outfile)
401        printer.write(before)
402        printer.write(START)
403        printer.write('#ifdef Py_DEBUG')
404        printer.write("static inline void")
405        with printer.block(
406                "_PyStaticObjects_CheckRefcnt(PyInterpreterState *interp)"):
407            printer.write('/* generated runtime-global */')
408            printer.write('// (see pycore_runtime_init_generated.h)')
409            for ref in generated_immortal_objects:
410                printer.write(f'_PyStaticObject_CheckRefcnt({ref});')
411            printer.write('/* non-generated */')
412            for ref in NON_GENERATED_IMMORTAL_OBJECTS:
413                printer.write(f'_PyStaticObject_CheckRefcnt({ref});')
414        printer.write('#endif  // Py_DEBUG')
415        printer.write(END)
416        printer.write(after)
417
418
419def get_identifiers_and_strings() -> 'tuple[set[str], dict[str, str]]':
420    identifiers = set(IDENTIFIERS)
421    strings = {}
422    # Note that we store strings as they appear in C source, so the checks here
423    # can be defeated, e.g.:
424    # - "a" and "\0x61" won't be reported as duplicate.
425    # - "\n" appears as 2 characters.
426    # Probably not worth adding a C string parser.
427    for name, string, *_ in iter_global_strings():
428        if string is None:
429            if name not in IGNORED:
430                identifiers.add(name)
431        else:
432            if len(string) == 1 and ord(string) < 256:
433                # Give a nice message for common mistakes.
434                # To cover tricky cases (like "\n") we also generate C asserts.
435                raise ValueError(
436                    'do not use &_PyID or &_Py_STR for one-character latin-1 '
437                    + f'strings, use _Py_LATIN1_CHR instead: {string!r}')
438            if string not in strings:
439                strings[string] = name
440            elif name != strings[string]:
441                raise ValueError(f'string mismatch for {name!r} ({string!r} != {strings[name]!r}')
442    overlap = identifiers & set(strings.keys())
443    if overlap:
444        raise ValueError(
445            'do not use both _PyID and _Py_DECLARE_STR for the same string: '
446            + repr(overlap))
447    return identifiers, strings
448
449
450#######################################
451# the script
452
453def main() -> None:
454    identifiers, strings = get_identifiers_and_strings()
455
456    generate_global_strings(identifiers, strings)
457    generated_immortal_objects = generate_runtime_init(identifiers, strings)
458    generate_static_strings_initializer(identifiers, strings)
459    generate_global_object_finalizers(generated_immortal_objects)
460
461
462if __name__ == '__main__':
463    main()
464