1"""Freeze modules and regen related files (e.g. Python/frozen.c). 2 3See the notes at the top of Python/frozen.c for more info. 4""" 5 6from collections import namedtuple 7import hashlib 8import os 9import ntpath 10import posixpath 11import argparse 12from update_file import updating_file_with_tmpfile 13 14 15ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) 16ROOT_DIR = os.path.abspath(ROOT_DIR) 17FROZEN_ONLY = os.path.join(ROOT_DIR, 'Tools', 'freeze', 'flag.py') 18 19STDLIB_DIR = os.path.join(ROOT_DIR, 'Lib') 20# If FROZEN_MODULES_DIR or DEEPFROZEN_MODULES_DIR is changed then the 21# .gitattributes and .gitignore files needs to be updated. 22FROZEN_MODULES_DIR = os.path.join(ROOT_DIR, 'Python', 'frozen_modules') 23 24FROZEN_FILE = os.path.join(ROOT_DIR, 'Python', 'frozen.c') 25MAKEFILE = os.path.join(ROOT_DIR, 'Makefile.pre.in') 26PCBUILD_PROJECT = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj') 27PCBUILD_FILTERS = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj.filters') 28PCBUILD_PYTHONCORE = os.path.join(ROOT_DIR, 'PCbuild', 'pythoncore.vcxproj') 29 30 31OS_PATH = 'ntpath' if os.name == 'nt' else 'posixpath' 32 33# These are modules that get frozen. 34# If you're debugging new bytecode instructions, 35# you can delete all sections except 'import system'. 36# This also speeds up building somewhat. 37TESTS_SECTION = 'Test module' 38FROZEN = [ 39 # See parse_frozen_spec() for the format. 40 # In cases where the frozenid is duplicated, the first one is re-used. 41 ('import system', [ 42 # These frozen modules are necessary for bootstrapping 43 # the import system. 44 'importlib._bootstrap : _frozen_importlib', 45 'importlib._bootstrap_external : _frozen_importlib_external', 46 # This module is important because some Python builds rely 47 # on a builtin zip file instead of a filesystem. 48 'zipimport', 49 ]), 50 # (You can delete entries from here down to the end of the list.) 51 ('stdlib - startup, without site (python -S)', [ 52 'abc', 53 'codecs', 54 # For now we do not freeze the encodings, due # to the noise all 55 # those extra modules add to the text printed during the build. 56 # (See https://github.com/python/cpython/pull/28398#pullrequestreview-756856469.) 57 #'<encodings.*>', 58 'io', 59 ]), 60 ('stdlib - startup, with site', [ 61 '_collections_abc', 62 '_sitebuiltins', 63 'genericpath', 64 'ntpath', 65 'posixpath', 66 # We must explicitly mark os.path as a frozen module 67 # even though it will never be imported. 68 f'{OS_PATH} : os.path', 69 'os', 70 'site', 71 'stat', 72 ]), 73 ('runpy - run module with -m', [ 74 "importlib.util", 75 "importlib.machinery", 76 "runpy", 77 ]), 78 (TESTS_SECTION, [ 79 '__hello__', 80 '__hello__ : __hello_alias__', 81 '__hello__ : <__phello_alias__>', 82 '__hello__ : __phello_alias__.spam', 83 '<__phello__.**.*>', 84 f'frozen_only : __hello_only__ = {FROZEN_ONLY}', 85 ]), 86 # (End of stuff you could delete.) 87] 88BOOTSTRAP = { 89 'importlib._bootstrap', 90 'importlib._bootstrap_external', 91 'zipimport', 92} 93 94 95####################################### 96# platform-specific helpers 97 98if os.path is posixpath: 99 relpath_for_posix_display = os.path.relpath 100 101 def relpath_for_windows_display(path, base): 102 return ntpath.relpath( 103 ntpath.join(*path.split(os.path.sep)), 104 ntpath.join(*base.split(os.path.sep)), 105 ) 106 107else: 108 relpath_for_windows_display = ntpath.relpath 109 110 def relpath_for_posix_display(path, base): 111 return posixpath.relpath( 112 posixpath.join(*path.split(os.path.sep)), 113 posixpath.join(*base.split(os.path.sep)), 114 ) 115 116 117####################################### 118# specs 119 120def parse_frozen_specs(): 121 seen = {} 122 for section, specs in FROZEN: 123 parsed = _parse_specs(specs, section, seen) 124 for item in parsed: 125 frozenid, pyfile, modname, ispkg, section = item 126 try: 127 source = seen[frozenid] 128 except KeyError: 129 source = FrozenSource.from_id(frozenid, pyfile) 130 seen[frozenid] = source 131 else: 132 assert not pyfile or pyfile == source.pyfile, item 133 yield FrozenModule(modname, ispkg, section, source) 134 135 136def _parse_specs(specs, section, seen): 137 for spec in specs: 138 info, subs = _parse_spec(spec, seen, section) 139 yield info 140 for info in subs or (): 141 yield info 142 143 144def _parse_spec(spec, knownids=None, section=None): 145 """Yield an info tuple for each module corresponding to the given spec. 146 147 The info consists of: (frozenid, pyfile, modname, ispkg, section). 148 149 Supported formats: 150 151 frozenid 152 frozenid : modname 153 frozenid : modname = pyfile 154 155 "frozenid" and "modname" must be valid module names (dot-separated 156 identifiers). If "modname" is not provided then "frozenid" is used. 157 If "pyfile" is not provided then the filename of the module 158 corresponding to "frozenid" is used. 159 160 Angle brackets around a frozenid (e.g. '<encodings>") indicate 161 it is a package. This also means it must be an actual module 162 (i.e. "pyfile" cannot have been provided). Such values can have 163 patterns to expand submodules: 164 165 <encodings.*> - also freeze all direct submodules 166 <encodings.**.*> - also freeze the full submodule tree 167 168 As with "frozenid", angle brackets around "modname" indicate 169 it is a package. However, in this case "pyfile" should not 170 have been provided and patterns in "modname" are not supported. 171 Also, if "modname" has brackets then "frozenid" should not, 172 and "pyfile" should have been provided.. 173 """ 174 frozenid, _, remainder = spec.partition(':') 175 modname, _, pyfile = remainder.partition('=') 176 frozenid = frozenid.strip() 177 modname = modname.strip() 178 pyfile = pyfile.strip() 179 180 submodules = None 181 if modname.startswith('<') and modname.endswith('>'): 182 assert check_modname(frozenid), spec 183 modname = modname[1:-1] 184 assert check_modname(modname), spec 185 if frozenid in knownids: 186 pass 187 elif pyfile: 188 assert not os.path.isdir(pyfile), spec 189 else: 190 pyfile = _resolve_module(frozenid, ispkg=False) 191 ispkg = True 192 elif pyfile: 193 assert check_modname(frozenid), spec 194 assert not knownids or frozenid not in knownids, spec 195 assert check_modname(modname), spec 196 assert not os.path.isdir(pyfile), spec 197 ispkg = False 198 elif knownids and frozenid in knownids: 199 assert check_modname(frozenid), spec 200 assert check_modname(modname), spec 201 ispkg = False 202 else: 203 assert not modname or check_modname(modname), spec 204 resolved = iter(resolve_modules(frozenid)) 205 frozenid, pyfile, ispkg = next(resolved) 206 if not modname: 207 modname = frozenid 208 if ispkg: 209 pkgid = frozenid 210 pkgname = modname 211 pkgfiles = {pyfile: pkgid} 212 def iter_subs(): 213 for frozenid, pyfile, ispkg in resolved: 214 if pkgname: 215 modname = frozenid.replace(pkgid, pkgname, 1) 216 else: 217 modname = frozenid 218 if pyfile: 219 if pyfile in pkgfiles: 220 frozenid = pkgfiles[pyfile] 221 pyfile = None 222 elif ispkg: 223 pkgfiles[pyfile] = frozenid 224 yield frozenid, pyfile, modname, ispkg, section 225 submodules = iter_subs() 226 227 info = (frozenid, pyfile or None, modname, ispkg, section) 228 return info, submodules 229 230 231####################################### 232# frozen source files 233 234class FrozenSource(namedtuple('FrozenSource', 'id pyfile frozenfile')): 235 236 @classmethod 237 def from_id(cls, frozenid, pyfile=None): 238 if not pyfile: 239 pyfile = os.path.join(STDLIB_DIR, *frozenid.split('.')) + '.py' 240 #assert os.path.exists(pyfile), (frozenid, pyfile) 241 frozenfile = resolve_frozen_file(frozenid, FROZEN_MODULES_DIR) 242 return cls(frozenid, pyfile, frozenfile) 243 244 @property 245 def frozenid(self): 246 return self.id 247 248 @property 249 def modname(self): 250 if self.pyfile.startswith(STDLIB_DIR): 251 return self.id 252 return None 253 254 @property 255 def symbol(self): 256 # This matches what we do in Programs/_freeze_module.c: 257 name = self.frozenid.replace('.', '_') 258 return '_Py_M__' + name 259 260 @property 261 def ispkg(self): 262 if not self.pyfile: 263 return False 264 elif self.frozenid.endswith('.__init__'): 265 return False 266 else: 267 return os.path.basename(self.pyfile) == '__init__.py' 268 269 @property 270 def isbootstrap(self): 271 return self.id in BOOTSTRAP 272 273 274def resolve_frozen_file(frozenid, destdir): 275 """Return the filename corresponding to the given frozen ID. 276 277 For stdlib modules the ID will always be the full name 278 of the source module. 279 """ 280 if not isinstance(frozenid, str): 281 try: 282 frozenid = frozenid.frozenid 283 except AttributeError: 284 raise ValueError(f'unsupported frozenid {frozenid!r}') 285 # We use a consistent naming convention for all frozen modules. 286 frozenfile = f'{frozenid}.h' 287 if not destdir: 288 return frozenfile 289 return os.path.join(destdir, frozenfile) 290 291 292####################################### 293# frozen modules 294 295class FrozenModule(namedtuple('FrozenModule', 'name ispkg section source')): 296 297 def __getattr__(self, name): 298 return getattr(self.source, name) 299 300 @property 301 def modname(self): 302 return self.name 303 304 @property 305 def orig(self): 306 return self.source.modname 307 308 @property 309 def isalias(self): 310 orig = self.source.modname 311 if not orig: 312 return True 313 return self.name != orig 314 315 def summarize(self): 316 source = self.source.modname 317 if source: 318 source = f'<{source}>' 319 else: 320 source = relpath_for_posix_display(self.pyfile, ROOT_DIR) 321 return { 322 'module': self.name, 323 'ispkg': self.ispkg, 324 'source': source, 325 'frozen': os.path.basename(self.frozenfile), 326 'checksum': _get_checksum(self.frozenfile), 327 } 328 329 330def _iter_sources(modules): 331 seen = set() 332 for mod in modules: 333 if mod.source not in seen: 334 yield mod.source 335 seen.add(mod.source) 336 337 338####################################### 339# generic helpers 340 341def _get_checksum(filename): 342 with open(filename, "rb") as infile: 343 contents = infile.read() 344 m = hashlib.sha256() 345 m.update(contents) 346 return m.hexdigest() 347 348 349def resolve_modules(modname, pyfile=None): 350 if modname.startswith('<') and modname.endswith('>'): 351 if pyfile: 352 assert os.path.isdir(pyfile) or os.path.basename(pyfile) == '__init__.py', pyfile 353 ispkg = True 354 modname = modname[1:-1] 355 rawname = modname 356 # For now, we only expect match patterns at the end of the name. 357 _modname, sep, match = modname.rpartition('.') 358 if sep: 359 if _modname.endswith('.**'): 360 modname = _modname[:-3] 361 match = f'**.{match}' 362 elif match and not match.isidentifier(): 363 modname = _modname 364 # Otherwise it's a plain name so we leave it alone. 365 else: 366 match = None 367 else: 368 ispkg = False 369 rawname = modname 370 match = None 371 372 if not check_modname(modname): 373 raise ValueError(f'not a valid module name ({rawname})') 374 375 if not pyfile: 376 pyfile = _resolve_module(modname, ispkg=ispkg) 377 elif os.path.isdir(pyfile): 378 pyfile = _resolve_module(modname, pyfile, ispkg) 379 yield modname, pyfile, ispkg 380 381 if match: 382 pkgdir = os.path.dirname(pyfile) 383 yield from iter_submodules(modname, pkgdir, match) 384 385 386def check_modname(modname): 387 return all(n.isidentifier() for n in modname.split('.')) 388 389 390def iter_submodules(pkgname, pkgdir=None, match='*'): 391 if not pkgdir: 392 pkgdir = os.path.join(STDLIB_DIR, *pkgname.split('.')) 393 if not match: 394 match = '**.*' 395 match_modname = _resolve_modname_matcher(match, pkgdir) 396 397 def _iter_submodules(pkgname, pkgdir): 398 for entry in sorted(os.scandir(pkgdir), key=lambda e: e.name): 399 matched, recursive = match_modname(entry.name) 400 if not matched: 401 continue 402 modname = f'{pkgname}.{entry.name}' 403 if modname.endswith('.py'): 404 yield modname[:-3], entry.path, False 405 elif entry.is_dir(): 406 pyfile = os.path.join(entry.path, '__init__.py') 407 # We ignore namespace packages. 408 if os.path.exists(pyfile): 409 yield modname, pyfile, True 410 if recursive: 411 yield from _iter_submodules(modname, entry.path) 412 413 return _iter_submodules(pkgname, pkgdir) 414 415 416def _resolve_modname_matcher(match, rootdir=None): 417 if isinstance(match, str): 418 if match.startswith('**.'): 419 recursive = True 420 pat = match[3:] 421 assert match 422 else: 423 recursive = False 424 pat = match 425 426 if pat == '*': 427 def match_modname(modname): 428 return True, recursive 429 else: 430 raise NotImplementedError(match) 431 elif callable(match): 432 match_modname = match(rootdir) 433 else: 434 raise ValueError(f'unsupported matcher {match!r}') 435 return match_modname 436 437 438def _resolve_module(modname, pathentry=STDLIB_DIR, ispkg=False): 439 assert pathentry, pathentry 440 pathentry = os.path.normpath(pathentry) 441 assert os.path.isabs(pathentry) 442 if ispkg: 443 return os.path.join(pathentry, *modname.split('.'), '__init__.py') 444 return os.path.join(pathentry, *modname.split('.')) + '.py' 445 446 447####################################### 448# regenerating dependent files 449 450def find_marker(lines, marker, file): 451 for pos, line in enumerate(lines): 452 if marker in line: 453 return pos 454 raise Exception(f"Can't find {marker!r} in file {file}") 455 456 457def replace_block(lines, start_marker, end_marker, replacements, file): 458 start_pos = find_marker(lines, start_marker, file) 459 end_pos = find_marker(lines, end_marker, file) 460 if end_pos <= start_pos: 461 raise Exception(f"End marker {end_marker!r} " 462 f"occurs before start marker {start_marker!r} " 463 f"in file {file}") 464 replacements = [line.rstrip() + '\n' for line in replacements] 465 return lines[:start_pos + 1] + replacements + lines[end_pos:] 466 467 468class UniqueList(list): 469 def __init__(self): 470 self._seen = set() 471 472 def append(self, item): 473 if item in self._seen: 474 return 475 super().append(item) 476 self._seen.add(item) 477 478 479def regen_frozen(modules): 480 headerlines = [] 481 parentdir = os.path.dirname(FROZEN_FILE) 482 for src in _iter_sources(modules): 483 # Adding a comment to separate sections here doesn't add much, 484 # so we don't. 485 header = relpath_for_posix_display(src.frozenfile, parentdir) 486 headerlines.append(f'#include "{header}"') 487 488 externlines = UniqueList() 489 bootstraplines = [] 490 stdliblines = [] 491 testlines = [] 492 aliaslines = [] 493 indent = ' ' 494 lastsection = None 495 for mod in modules: 496 if mod.isbootstrap: 497 lines = bootstraplines 498 elif mod.section == TESTS_SECTION: 499 lines = testlines 500 else: 501 lines = stdliblines 502 if mod.section != lastsection: 503 if lastsection is not None: 504 lines.append('') 505 lines.append(f'/* {mod.section} */') 506 lastsection = mod.section 507 508 pkg = 'true' if mod.ispkg else 'false' 509 size = f"(int)sizeof({mod.symbol})" 510 line = f'{{"{mod.name}", {mod.symbol}, {size}, {pkg}}},' 511 lines.append(line) 512 513 if mod.isalias: 514 if not mod.orig: 515 entry = '{"%s", NULL},' % (mod.name,) 516 elif mod.source.ispkg: 517 entry = '{"%s", "<%s"},' % (mod.name, mod.orig) 518 else: 519 entry = '{"%s", "%s"},' % (mod.name, mod.orig) 520 aliaslines.append(indent + entry) 521 522 for lines in (bootstraplines, stdliblines, testlines): 523 # TODO: Is this necessary any more? 524 if lines and not lines[0]: 525 del lines[0] 526 for i, line in enumerate(lines): 527 if line: 528 lines[i] = indent + line 529 530 print(f'# Updating {os.path.relpath(FROZEN_FILE)}') 531 with updating_file_with_tmpfile(FROZEN_FILE) as (infile, outfile): 532 lines = infile.readlines() 533 # TODO: Use more obvious markers, e.g. 534 # $START GENERATED FOOBAR$ / $END GENERATED FOOBAR$ 535 lines = replace_block( 536 lines, 537 "/* Includes for frozen modules: */", 538 "/* End includes */", 539 headerlines, 540 FROZEN_FILE, 541 ) 542 lines = replace_block( 543 lines, 544 "static const struct _frozen bootstrap_modules[] =", 545 "/* bootstrap sentinel */", 546 bootstraplines, 547 FROZEN_FILE, 548 ) 549 lines = replace_block( 550 lines, 551 "static const struct _frozen stdlib_modules[] =", 552 "/* stdlib sentinel */", 553 stdliblines, 554 FROZEN_FILE, 555 ) 556 lines = replace_block( 557 lines, 558 "static const struct _frozen test_modules[] =", 559 "/* test sentinel */", 560 testlines, 561 FROZEN_FILE, 562 ) 563 lines = replace_block( 564 lines, 565 "const struct _module_alias aliases[] =", 566 "/* aliases sentinel */", 567 aliaslines, 568 FROZEN_FILE, 569 ) 570 outfile.writelines(lines) 571 572 573def regen_makefile(modules): 574 pyfiles = [] 575 frozenfiles = [] 576 rules = [''] 577 for src in _iter_sources(modules): 578 frozen_header = relpath_for_posix_display(src.frozenfile, ROOT_DIR) 579 frozenfiles.append(f'\t\t{frozen_header} \\') 580 581 pyfile = relpath_for_posix_display(src.pyfile, ROOT_DIR) 582 pyfiles.append(f'\t\t{pyfile} \\') 583 584 if src.isbootstrap: 585 freezecmd = '$(FREEZE_MODULE_BOOTSTRAP)' 586 freezedep = '$(FREEZE_MODULE_BOOTSTRAP_DEPS)' 587 else: 588 freezecmd = '$(FREEZE_MODULE)' 589 freezedep = '$(FREEZE_MODULE_DEPS)' 590 591 freeze = (f'{freezecmd} {src.frozenid} ' 592 f'$(srcdir)/{pyfile} {frozen_header}') 593 rules.extend([ 594 f'{frozen_header}: {pyfile} {freezedep}', 595 f'\t{freeze}', 596 '', 597 ]) 598 pyfiles[-1] = pyfiles[-1].rstrip(" \\") 599 frozenfiles[-1] = frozenfiles[-1].rstrip(" \\") 600 601 print(f'# Updating {os.path.relpath(MAKEFILE)}') 602 with updating_file_with_tmpfile(MAKEFILE) as (infile, outfile): 603 lines = infile.readlines() 604 lines = replace_block( 605 lines, 606 "FROZEN_FILES_IN =", 607 "# End FROZEN_FILES_IN", 608 pyfiles, 609 MAKEFILE, 610 ) 611 lines = replace_block( 612 lines, 613 "FROZEN_FILES_OUT =", 614 "# End FROZEN_FILES_OUT", 615 frozenfiles, 616 MAKEFILE, 617 ) 618 lines = replace_block( 619 lines, 620 "# BEGIN: freezing modules", 621 "# END: freezing modules", 622 rules, 623 MAKEFILE, 624 ) 625 outfile.writelines(lines) 626 627 628def regen_pcbuild(modules): 629 projlines = [] 630 filterlines = [] 631 corelines = [] 632 for src in _iter_sources(modules): 633 pyfile = relpath_for_windows_display(src.pyfile, ROOT_DIR) 634 header = relpath_for_windows_display(src.frozenfile, ROOT_DIR) 635 intfile = ntpath.splitext(ntpath.basename(header))[0] + '.g.h' 636 projlines.append(f' <None Include="..\\{pyfile}">') 637 projlines.append(f' <ModName>{src.frozenid}</ModName>') 638 projlines.append(f' <IntFile>$(IntDir){intfile}</IntFile>') 639 projlines.append(f' <OutFile>$(GeneratedFrozenModulesDir){header}</OutFile>') 640 projlines.append(f' </None>') 641 642 filterlines.append(f' <None Include="..\\{pyfile}">') 643 filterlines.append(' <Filter>Python Files</Filter>') 644 filterlines.append(' </None>') 645 646 print(f'# Updating {os.path.relpath(PCBUILD_PROJECT)}') 647 with updating_file_with_tmpfile(PCBUILD_PROJECT) as (infile, outfile): 648 lines = infile.readlines() 649 lines = replace_block( 650 lines, 651 '<!-- BEGIN frozen modules -->', 652 '<!-- END frozen modules -->', 653 projlines, 654 PCBUILD_PROJECT, 655 ) 656 outfile.writelines(lines) 657 print(f'# Updating {os.path.relpath(PCBUILD_FILTERS)}') 658 with updating_file_with_tmpfile(PCBUILD_FILTERS) as (infile, outfile): 659 lines = infile.readlines() 660 lines = replace_block( 661 lines, 662 '<!-- BEGIN frozen modules -->', 663 '<!-- END frozen modules -->', 664 filterlines, 665 PCBUILD_FILTERS, 666 ) 667 outfile.writelines(lines) 668 669 670####################################### 671# the script 672 673def main(): 674 # Expand the raw specs, preserving order. 675 modules = list(parse_frozen_specs()) 676 677 # Regen build-related files. 678 regen_makefile(modules) 679 regen_pcbuild(modules) 680 regen_frozen(modules) 681 682 683if __name__ == '__main__': 684 main() 685