1import contextlib 2import io 3import os.path 4import re 5 6SCRIPT_NAME = 'Tools/build/generate_global_objects.py' 7__file__ = os.path.abspath(__file__) 8ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) 9INTERNAL = os.path.join(ROOT, 'Include', 'internal') 10 11 12IGNORED = { 13 'ACTION', # Python/_warnings.c 14 'ATTR', # Python/_warnings.c and Objects/funcobject.c 15 'DUNDER', # Objects/typeobject.c 16 'RDUNDER', # Objects/typeobject.c 17 'SPECIAL', # Objects/weakrefobject.c 18 'NAME', # Objects/typeobject.c 19} 20IDENTIFIERS = [ 21 # from ADD() Python/_warnings.c 22 'default', 23 'ignore', 24 25 # from GET_WARNINGS_ATTR() in Python/_warnings.c 26 'WarningMessage', 27 '_showwarnmsg', 28 '_warn_unawaited_coroutine', 29 'defaultaction', 30 'filters', 31 'onceregistry', 32 33 # from WRAP_METHOD() in Objects/weakrefobject.c 34 '__bytes__', 35 '__reversed__', 36 37 # from COPY_ATTR() in Objects/funcobject.c 38 '__module__', 39 '__name__', 40 '__qualname__', 41 '__doc__', 42 '__annotations__', 43 44 # from SLOT* in Objects/typeobject.c 45 '__abs__', 46 '__add__', 47 '__aiter__', 48 '__and__', 49 '__anext__', 50 '__await__', 51 '__bool__', 52 '__call__', 53 '__contains__', 54 '__del__', 55 '__delattr__', 56 '__delete__', 57 '__delitem__', 58 '__eq__', 59 '__float__', 60 '__floordiv__', 61 '__ge__', 62 '__get__', 63 '__getattr__', 64 '__getattribute__', 65 '__getitem__', 66 '__gt__', 67 '__hash__', 68 '__iadd__', 69 '__iand__', 70 '__ifloordiv__', 71 '__ilshift__', 72 '__imatmul__', 73 '__imod__', 74 '__imul__', 75 '__index__', 76 '__init__', 77 '__int__', 78 '__invert__', 79 '__ior__', 80 '__ipow__', 81 '__irshift__', 82 '__isub__', 83 '__iter__', 84 '__itruediv__', 85 '__ixor__', 86 '__le__', 87 '__len__', 88 '__lshift__', 89 '__lt__', 90 '__matmul__', 91 '__mod__', 92 '__mul__', 93 '__ne__', 94 '__neg__', 95 '__new__', 96 '__next__', 97 '__or__', 98 '__pos__', 99 '__pow__', 100 '__radd__', 101 '__rand__', 102 '__repr__', 103 '__rfloordiv__', 104 '__rlshift__', 105 '__rmatmul__', 106 '__rmod__', 107 '__rmul__', 108 '__ror__', 109 '__rpow__', 110 '__rrshift__', 111 '__rshift__', 112 '__rsub__', 113 '__rtruediv__', 114 '__rxor__', 115 '__set__', 116 '__setattr__', 117 '__setitem__', 118 '__str__', 119 '__sub__', 120 '__truediv__', 121 '__xor__', 122 '__divmod__', 123 '__rdivmod__', 124 '__buffer__', 125 '__release_buffer__', 126 127 #Workarounds for GH-108918 128 'alias', 129 'args', 130 'exc_type', 131 'exc_value', 132 'self', 133 'traceback', 134] 135 136NON_GENERATED_IMMORTAL_OBJECTS = [ 137 # The generated ones come from generate_runtime_init(). 138 '(PyObject *)&_Py_SINGLETON(bytes_empty)', 139 '(PyObject *)&_Py_SINGLETON(tuple_empty)', 140 '(PyObject *)&_Py_SINGLETON(hamt_bitmap_node_empty)', 141 '(PyObject *)&_Py_INTERP_SINGLETON(interp, hamt_empty)', 142 '(PyObject *)&_Py_SINGLETON(context_token_missing)', 143] 144 145 146####################################### 147# helpers 148 149def iter_files(): 150 for name in ('Modules', 'Objects', 'Parser', 'PC', 'Programs', 'Python'): 151 root = os.path.join(ROOT, name) 152 for dirname, _, files in os.walk(root): 153 for name in files: 154 if not name.endswith(('.c', '.h')): 155 continue 156 yield os.path.join(dirname, name) 157 158 159def iter_global_strings(): 160 id_regex = re.compile(r'\b_Py_ID\((\w+)\)') 161 str_regex = re.compile(r'\b_Py_DECLARE_STR\((\w+), "(.*?)"\)') 162 for filename in iter_files(): 163 try: 164 infile = open(filename, encoding='utf-8') 165 except FileNotFoundError: 166 # The file must have been a temporary file. 167 continue 168 with infile: 169 for lno, line in enumerate(infile, 1): 170 for m in id_regex.finditer(line): 171 identifier, = m.groups() 172 yield identifier, None, filename, lno, line 173 for m in str_regex.finditer(line): 174 varname, string = m.groups() 175 yield varname, string, filename, lno, line 176 177 178def iter_to_marker(lines, marker): 179 for line in lines: 180 if line.rstrip() == marker: 181 break 182 yield line 183 184 185class Printer: 186 187 def __init__(self, file): 188 self.level = 0 189 self.file = file 190 self.continuation = [False] 191 192 @contextlib.contextmanager 193 def indent(self): 194 save_level = self.level 195 try: 196 self.level += 1 197 yield 198 finally: 199 self.level = save_level 200 201 def write(self, arg): 202 eol = '\n' 203 if self.continuation[-1]: 204 eol = f' \\{eol}' if arg else f'\\{eol}' 205 self.file.writelines((" "*self.level, arg, eol)) 206 207 @contextlib.contextmanager 208 def block(self, prefix, suffix="", *, continuation=None): 209 if continuation is None: 210 continuation = self.continuation[-1] 211 self.continuation.append(continuation) 212 213 self.write(prefix + " {") 214 with self.indent(): 215 yield 216 self.continuation.pop() 217 self.write("}" + suffix) 218 219 220@contextlib.contextmanager 221def open_for_changes(filename, orig): 222 """Like open() but only write to the file if it changed.""" 223 outfile = io.StringIO() 224 yield outfile 225 text = outfile.getvalue() 226 if text != orig: 227 with open(filename, 'w', encoding='utf-8') as outfile: 228 outfile.write(text) 229 else: 230 print(f'# not changed: {filename}') 231 232 233####################################### 234# the global objects 235 236START = f'/* The following is auto-generated by {SCRIPT_NAME}. */' 237END = '/* End auto-generated code */' 238 239 240def generate_global_strings(identifiers, strings): 241 filename = os.path.join(INTERNAL, 'pycore_global_strings.h') 242 243 # Read the non-generated part of the file. 244 with open(filename) as infile: 245 orig = infile.read() 246 lines = iter(orig.rstrip().splitlines()) 247 before = '\n'.join(iter_to_marker(lines, START)) 248 for _ in iter_to_marker(lines, END): 249 pass 250 after = '\n'.join(lines) 251 252 # Generate the file. 253 with open_for_changes(filename, orig) as outfile: 254 printer = Printer(outfile) 255 printer.write(before) 256 printer.write(START) 257 with printer.block('struct _Py_global_strings', ';'): 258 with printer.block('struct', ' literals;'): 259 for literal, name in sorted(strings.items(), key=lambda x: x[1]): 260 printer.write(f'STRUCT_FOR_STR({name}, "{literal}")') 261 outfile.write('\n') 262 with printer.block('struct', ' identifiers;'): 263 for name in sorted(identifiers): 264 assert name.isidentifier(), name 265 printer.write(f'STRUCT_FOR_ID({name})') 266 with printer.block('struct', ' ascii[128];'): 267 printer.write("PyASCIIObject _ascii;") 268 printer.write("uint8_t _data[2];") 269 with printer.block('struct', ' latin1[128];'): 270 printer.write("PyCompactUnicodeObject _latin1;") 271 printer.write("uint8_t _data[2];") 272 printer.write(END) 273 printer.write(after) 274 275 276def generate_runtime_init(identifiers, strings): 277 # First get some info from the declarations. 278 nsmallposints = None 279 nsmallnegints = None 280 with open(os.path.join(INTERNAL, 'pycore_global_objects.h')) as infile: 281 for line in infile: 282 if line.startswith('#define _PY_NSMALLPOSINTS'): 283 nsmallposints = int(line.split()[-1]) 284 elif line.startswith('#define _PY_NSMALLNEGINTS'): 285 nsmallnegints = int(line.split()[-1]) 286 break 287 else: 288 raise NotImplementedError 289 assert nsmallposints and nsmallnegints 290 291 # Then target the runtime initializer. 292 filename = os.path.join(INTERNAL, 'pycore_runtime_init_generated.h') 293 294 # Read the non-generated part of the file. 295 with open(filename) as infile: 296 orig = infile.read() 297 lines = iter(orig.rstrip().splitlines()) 298 before = '\n'.join(iter_to_marker(lines, START)) 299 for _ in iter_to_marker(lines, END): 300 pass 301 after = '\n'.join(lines) 302 303 # Generate the file. 304 with open_for_changes(filename, orig) as outfile: 305 immortal_objects = [] 306 printer = Printer(outfile) 307 printer.write(before) 308 printer.write(START) 309 with printer.block('#define _Py_small_ints_INIT', continuation=True): 310 for i in range(-nsmallnegints, nsmallposints): 311 printer.write(f'_PyLong_DIGIT_INIT({i}),') 312 immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + {i}]') 313 printer.write('') 314 with printer.block('#define _Py_bytes_characters_INIT', continuation=True): 315 for i in range(256): 316 printer.write(f'_PyBytes_CHAR_INIT({i}),') 317 immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(bytes_characters)[{i}]') 318 printer.write('') 319 with printer.block('#define _Py_str_literals_INIT', continuation=True): 320 for literal, name in sorted(strings.items(), key=lambda x: x[1]): 321 printer.write(f'INIT_STR({name}, "{literal}"),') 322 immortal_objects.append(f'(PyObject *)&_Py_STR({name})') 323 printer.write('') 324 with printer.block('#define _Py_str_identifiers_INIT', continuation=True): 325 for name in sorted(identifiers): 326 assert name.isidentifier(), name 327 printer.write(f'INIT_ID({name}),') 328 immortal_objects.append(f'(PyObject *)&_Py_ID({name})') 329 printer.write('') 330 with printer.block('#define _Py_str_ascii_INIT', continuation=True): 331 for i in range(128): 332 printer.write(f'_PyASCIIObject_INIT("\\x{i:02x}"),') 333 immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).ascii[{i}]') 334 printer.write('') 335 with printer.block('#define _Py_str_latin1_INIT', continuation=True): 336 for i in range(128, 256): 337 utf8 = ['"'] 338 for c in chr(i).encode('utf-8'): 339 utf8.append(f"\\x{c:02x}") 340 utf8.append('"') 341 printer.write(f'_PyUnicode_LATIN1_INIT("\\x{i:02x}", {"".join(utf8)}),') 342 immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).latin1[{i} - 128]') 343 printer.write(END) 344 printer.write(after) 345 return immortal_objects 346 347 348def generate_static_strings_initializer(identifiers, strings): 349 # Target the runtime initializer. 350 filename = os.path.join(INTERNAL, 'pycore_unicodeobject_generated.h') 351 352 # Read the non-generated part of the file. 353 with open(filename) as infile: 354 orig = infile.read() 355 lines = iter(orig.rstrip().splitlines()) 356 before = '\n'.join(iter_to_marker(lines, START)) 357 for _ in iter_to_marker(lines, END): 358 pass 359 after = '\n'.join(lines) 360 361 # Generate the file. 362 with open_for_changes(filename, orig) as outfile: 363 printer = Printer(outfile) 364 printer.write(before) 365 printer.write(START) 366 printer.write("static inline void") 367 with printer.block("_PyUnicode_InitStaticStrings(PyInterpreterState *interp)"): 368 printer.write(f'PyObject *string;') 369 for i in sorted(identifiers): 370 # This use of _Py_ID() is ignored by iter_global_strings() 371 # since iter_files() ignores .h files. 372 printer.write(f'string = &_Py_ID({i});') 373 printer.write(f'_PyUnicode_InternStatic(interp, &string);') 374 printer.write(f'assert(_PyUnicode_CheckConsistency(string, 1));') 375 printer.write(f'assert(PyUnicode_GET_LENGTH(string) != 1);') 376 for value, name in sorted(strings.items()): 377 printer.write(f'string = &_Py_STR({name});') 378 printer.write(f'_PyUnicode_InternStatic(interp, &string);') 379 printer.write(f'assert(_PyUnicode_CheckConsistency(string, 1));') 380 printer.write(f'assert(PyUnicode_GET_LENGTH(string) != 1);') 381 printer.write(END) 382 printer.write(after) 383 384 385def generate_global_object_finalizers(generated_immortal_objects): 386 # Target the runtime initializer. 387 filename = os.path.join(INTERNAL, 'pycore_global_objects_fini_generated.h') 388 389 # Read the non-generated part of the file. 390 with open(filename) as infile: 391 orig = infile.read() 392 lines = iter(orig.rstrip().splitlines()) 393 before = '\n'.join(iter_to_marker(lines, START)) 394 for _ in iter_to_marker(lines, END): 395 pass 396 after = '\n'.join(lines) 397 398 # Generate the file. 399 with open_for_changes(filename, orig) as outfile: 400 printer = Printer(outfile) 401 printer.write(before) 402 printer.write(START) 403 printer.write('#ifdef Py_DEBUG') 404 printer.write("static inline void") 405 with printer.block( 406 "_PyStaticObjects_CheckRefcnt(PyInterpreterState *interp)"): 407 printer.write('/* generated runtime-global */') 408 printer.write('// (see pycore_runtime_init_generated.h)') 409 for ref in generated_immortal_objects: 410 printer.write(f'_PyStaticObject_CheckRefcnt({ref});') 411 printer.write('/* non-generated */') 412 for ref in NON_GENERATED_IMMORTAL_OBJECTS: 413 printer.write(f'_PyStaticObject_CheckRefcnt({ref});') 414 printer.write('#endif // Py_DEBUG') 415 printer.write(END) 416 printer.write(after) 417 418 419def get_identifiers_and_strings() -> 'tuple[set[str], dict[str, str]]': 420 identifiers = set(IDENTIFIERS) 421 strings = {} 422 # Note that we store strings as they appear in C source, so the checks here 423 # can be defeated, e.g.: 424 # - "a" and "\0x61" won't be reported as duplicate. 425 # - "\n" appears as 2 characters. 426 # Probably not worth adding a C string parser. 427 for name, string, *_ in iter_global_strings(): 428 if string is None: 429 if name not in IGNORED: 430 identifiers.add(name) 431 else: 432 if len(string) == 1 and ord(string) < 256: 433 # Give a nice message for common mistakes. 434 # To cover tricky cases (like "\n") we also generate C asserts. 435 raise ValueError( 436 'do not use &_PyID or &_Py_STR for one-character latin-1 ' 437 + f'strings, use _Py_LATIN1_CHR instead: {string!r}') 438 if string not in strings: 439 strings[string] = name 440 elif name != strings[string]: 441 raise ValueError(f'string mismatch for {name!r} ({string!r} != {strings[name]!r}') 442 overlap = identifiers & set(strings.keys()) 443 if overlap: 444 raise ValueError( 445 'do not use both _PyID and _Py_DECLARE_STR for the same string: ' 446 + repr(overlap)) 447 return identifiers, strings 448 449 450####################################### 451# the script 452 453def main() -> None: 454 identifiers, strings = get_identifiers_and_strings() 455 456 generate_global_strings(identifiers, strings) 457 generated_immortal_objects = generate_runtime_init(identifiers, strings) 458 generate_static_strings_initializer(identifiers, strings) 459 generate_global_object_finalizers(generated_immortal_objects) 460 461 462if __name__ == '__main__': 463 main() 464