• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Freeze modules and regen related files (e.g. Python/frozen.c).
2
3See the notes at the top of Python/frozen.c for more info.
4"""
5
6from collections import namedtuple
7import hashlib
8import os
9import ntpath
10import posixpath
11import argparse
12from update_file import updating_file_with_tmpfile
13
14
15ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
16ROOT_DIR = os.path.abspath(ROOT_DIR)
17FROZEN_ONLY = os.path.join(ROOT_DIR, 'Tools', 'freeze', 'flag.py')
18
19STDLIB_DIR = os.path.join(ROOT_DIR, 'Lib')
20# If FROZEN_MODULES_DIR or DEEPFROZEN_MODULES_DIR is changed then the
21# .gitattributes and .gitignore files needs to be updated.
22FROZEN_MODULES_DIR = os.path.join(ROOT_DIR, 'Python', 'frozen_modules')
23
24FROZEN_FILE = os.path.join(ROOT_DIR, 'Python', 'frozen.c')
25MAKEFILE = os.path.join(ROOT_DIR, 'Makefile.pre.in')
26PCBUILD_PROJECT = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj')
27PCBUILD_FILTERS = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj.filters')
28PCBUILD_PYTHONCORE = os.path.join(ROOT_DIR, 'PCbuild', 'pythoncore.vcxproj')
29
30
31OS_PATH = 'ntpath' if os.name == 'nt' else 'posixpath'
32
33# These are modules that get frozen.
34# If you're debugging new bytecode instructions,
35# you can delete all sections except 'import system'.
36# This also speeds up building somewhat.
37TESTS_SECTION = 'Test module'
38FROZEN = [
39    # See parse_frozen_spec() for the format.
40    # In cases where the frozenid is duplicated, the first one is re-used.
41    ('import system', [
42        # These frozen modules are necessary for bootstrapping
43        # the import system.
44        'importlib._bootstrap : _frozen_importlib',
45        'importlib._bootstrap_external : _frozen_importlib_external',
46        # This module is important because some Python builds rely
47        # on a builtin zip file instead of a filesystem.
48        'zipimport',
49        ]),
50    # (You can delete entries from here down to the end of the list.)
51    ('stdlib - startup, without site (python -S)', [
52        'abc',
53        'codecs',
54        # For now we do not freeze the encodings, due # to the noise all
55        # those extra modules add to the text printed during the build.
56        # (See https://github.com/python/cpython/pull/28398#pullrequestreview-756856469.)
57        #'<encodings.*>',
58        'io',
59        ]),
60    ('stdlib - startup, with site', [
61        '_collections_abc',
62        '_sitebuiltins',
63        'genericpath',
64        'ntpath',
65        'posixpath',
66        # We must explicitly mark os.path as a frozen module
67        # even though it will never be imported.
68        f'{OS_PATH} : os.path',
69        'os',
70        'site',
71        'stat',
72        ]),
73    ('runpy - run module with -m', [
74        "importlib.util",
75        "importlib.machinery",
76        "runpy",
77    ]),
78    (TESTS_SECTION, [
79        '__hello__',
80        '__hello__ : __hello_alias__',
81        '__hello__ : <__phello_alias__>',
82        '__hello__ : __phello_alias__.spam',
83        '<__phello__.**.*>',
84        f'frozen_only : __hello_only__ = {FROZEN_ONLY}',
85        ]),
86    # (End of stuff you could delete.)
87]
88BOOTSTRAP = {
89    'importlib._bootstrap',
90    'importlib._bootstrap_external',
91    'zipimport',
92}
93
94
95#######################################
96# platform-specific helpers
97
98if os.path is posixpath:
99    relpath_for_posix_display = os.path.relpath
100
101    def relpath_for_windows_display(path, base):
102        return ntpath.relpath(
103            ntpath.join(*path.split(os.path.sep)),
104            ntpath.join(*base.split(os.path.sep)),
105        )
106
107else:
108    relpath_for_windows_display = ntpath.relpath
109
110    def relpath_for_posix_display(path, base):
111        return posixpath.relpath(
112            posixpath.join(*path.split(os.path.sep)),
113            posixpath.join(*base.split(os.path.sep)),
114        )
115
116
117#######################################
118# specs
119
120def parse_frozen_specs():
121    seen = {}
122    for section, specs in FROZEN:
123        parsed = _parse_specs(specs, section, seen)
124        for item in parsed:
125            frozenid, pyfile, modname, ispkg, section = item
126            try:
127                source = seen[frozenid]
128            except KeyError:
129                source = FrozenSource.from_id(frozenid, pyfile)
130                seen[frozenid] = source
131            else:
132                assert not pyfile or pyfile == source.pyfile, item
133            yield FrozenModule(modname, ispkg, section, source)
134
135
136def _parse_specs(specs, section, seen):
137    for spec in specs:
138        info, subs = _parse_spec(spec, seen, section)
139        yield info
140        for info in subs or ():
141            yield info
142
143
144def _parse_spec(spec, knownids=None, section=None):
145    """Yield an info tuple for each module corresponding to the given spec.
146
147    The info consists of: (frozenid, pyfile, modname, ispkg, section).
148
149    Supported formats:
150
151      frozenid
152      frozenid : modname
153      frozenid : modname = pyfile
154
155    "frozenid" and "modname" must be valid module names (dot-separated
156    identifiers).  If "modname" is not provided then "frozenid" is used.
157    If "pyfile" is not provided then the filename of the module
158    corresponding to "frozenid" is used.
159
160    Angle brackets around a frozenid (e.g. '<encodings>") indicate
161    it is a package.  This also means it must be an actual module
162    (i.e. "pyfile" cannot have been provided).  Such values can have
163    patterns to expand submodules:
164
165      <encodings.*>    - also freeze all direct submodules
166      <encodings.**.*> - also freeze the full submodule tree
167
168    As with "frozenid", angle brackets around "modname" indicate
169    it is a package.  However, in this case "pyfile" should not
170    have been provided and patterns in "modname" are not supported.
171    Also, if "modname" has brackets then "frozenid" should not,
172    and "pyfile" should have been provided..
173    """
174    frozenid, _, remainder = spec.partition(':')
175    modname, _, pyfile = remainder.partition('=')
176    frozenid = frozenid.strip()
177    modname = modname.strip()
178    pyfile = pyfile.strip()
179
180    submodules = None
181    if modname.startswith('<') and modname.endswith('>'):
182        assert check_modname(frozenid), spec
183        modname = modname[1:-1]
184        assert check_modname(modname), spec
185        if frozenid in knownids:
186            pass
187        elif pyfile:
188            assert not os.path.isdir(pyfile), spec
189        else:
190            pyfile = _resolve_module(frozenid, ispkg=False)
191        ispkg = True
192    elif pyfile:
193        assert check_modname(frozenid), spec
194        assert not knownids or frozenid not in knownids, spec
195        assert check_modname(modname), spec
196        assert not os.path.isdir(pyfile), spec
197        ispkg = False
198    elif knownids and frozenid in knownids:
199        assert check_modname(frozenid), spec
200        assert check_modname(modname), spec
201        ispkg = False
202    else:
203        assert not modname or check_modname(modname), spec
204        resolved = iter(resolve_modules(frozenid))
205        frozenid, pyfile, ispkg = next(resolved)
206        if not modname:
207            modname = frozenid
208        if ispkg:
209            pkgid = frozenid
210            pkgname = modname
211            pkgfiles = {pyfile: pkgid}
212            def iter_subs():
213                for frozenid, pyfile, ispkg in resolved:
214                    if pkgname:
215                        modname = frozenid.replace(pkgid, pkgname, 1)
216                    else:
217                        modname = frozenid
218                    if pyfile:
219                        if pyfile in pkgfiles:
220                            frozenid = pkgfiles[pyfile]
221                            pyfile = None
222                        elif ispkg:
223                            pkgfiles[pyfile] = frozenid
224                    yield frozenid, pyfile, modname, ispkg, section
225            submodules = iter_subs()
226
227    info = (frozenid, pyfile or None, modname, ispkg, section)
228    return info, submodules
229
230
231#######################################
232# frozen source files
233
234class FrozenSource(namedtuple('FrozenSource', 'id pyfile frozenfile')):
235
236    @classmethod
237    def from_id(cls, frozenid, pyfile=None):
238        if not pyfile:
239            pyfile = os.path.join(STDLIB_DIR, *frozenid.split('.')) + '.py'
240            #assert os.path.exists(pyfile), (frozenid, pyfile)
241        frozenfile = resolve_frozen_file(frozenid, FROZEN_MODULES_DIR)
242        return cls(frozenid, pyfile, frozenfile)
243
244    @property
245    def frozenid(self):
246        return self.id
247
248    @property
249    def modname(self):
250        if self.pyfile.startswith(STDLIB_DIR):
251            return self.id
252        return None
253
254    @property
255    def symbol(self):
256        # This matches what we do in Programs/_freeze_module.c:
257        name = self.frozenid.replace('.', '_')
258        return '_Py_M__' + name
259
260    @property
261    def ispkg(self):
262        if not self.pyfile:
263            return False
264        elif self.frozenid.endswith('.__init__'):
265            return False
266        else:
267            return os.path.basename(self.pyfile) == '__init__.py'
268
269    @property
270    def isbootstrap(self):
271        return self.id in BOOTSTRAP
272
273
274def resolve_frozen_file(frozenid, destdir):
275    """Return the filename corresponding to the given frozen ID.
276
277    For stdlib modules the ID will always be the full name
278    of the source module.
279    """
280    if not isinstance(frozenid, str):
281        try:
282            frozenid = frozenid.frozenid
283        except AttributeError:
284            raise ValueError(f'unsupported frozenid {frozenid!r}')
285    # We use a consistent naming convention for all frozen modules.
286    frozenfile = f'{frozenid}.h'
287    if not destdir:
288        return frozenfile
289    return os.path.join(destdir, frozenfile)
290
291
292#######################################
293# frozen modules
294
295class FrozenModule(namedtuple('FrozenModule', 'name ispkg section source')):
296
297    def __getattr__(self, name):
298        return getattr(self.source, name)
299
300    @property
301    def modname(self):
302        return self.name
303
304    @property
305    def orig(self):
306        return self.source.modname
307
308    @property
309    def isalias(self):
310        orig = self.source.modname
311        if not orig:
312            return True
313        return self.name != orig
314
315    def summarize(self):
316        source = self.source.modname
317        if source:
318            source = f'<{source}>'
319        else:
320            source = relpath_for_posix_display(self.pyfile, ROOT_DIR)
321        return {
322            'module': self.name,
323            'ispkg': self.ispkg,
324            'source': source,
325            'frozen': os.path.basename(self.frozenfile),
326            'checksum': _get_checksum(self.frozenfile),
327        }
328
329
330def _iter_sources(modules):
331    seen = set()
332    for mod in modules:
333        if mod.source not in seen:
334            yield mod.source
335            seen.add(mod.source)
336
337
338#######################################
339# generic helpers
340
341def _get_checksum(filename):
342    with open(filename, "rb") as infile:
343        contents = infile.read()
344    m = hashlib.sha256()
345    m.update(contents)
346    return m.hexdigest()
347
348
349def resolve_modules(modname, pyfile=None):
350    if modname.startswith('<') and modname.endswith('>'):
351        if pyfile:
352            assert os.path.isdir(pyfile) or os.path.basename(pyfile) == '__init__.py', pyfile
353        ispkg = True
354        modname = modname[1:-1]
355        rawname = modname
356        # For now, we only expect match patterns at the end of the name.
357        _modname, sep, match = modname.rpartition('.')
358        if sep:
359            if _modname.endswith('.**'):
360                modname = _modname[:-3]
361                match = f'**.{match}'
362            elif match and not match.isidentifier():
363                modname = _modname
364            # Otherwise it's a plain name so we leave it alone.
365        else:
366            match = None
367    else:
368        ispkg = False
369        rawname = modname
370        match = None
371
372    if not check_modname(modname):
373        raise ValueError(f'not a valid module name ({rawname})')
374
375    if not pyfile:
376        pyfile = _resolve_module(modname, ispkg=ispkg)
377    elif os.path.isdir(pyfile):
378        pyfile = _resolve_module(modname, pyfile, ispkg)
379    yield modname, pyfile, ispkg
380
381    if match:
382        pkgdir = os.path.dirname(pyfile)
383        yield from iter_submodules(modname, pkgdir, match)
384
385
386def check_modname(modname):
387    return all(n.isidentifier() for n in modname.split('.'))
388
389
390def iter_submodules(pkgname, pkgdir=None, match='*'):
391    if not pkgdir:
392        pkgdir = os.path.join(STDLIB_DIR, *pkgname.split('.'))
393    if not match:
394        match = '**.*'
395    match_modname = _resolve_modname_matcher(match, pkgdir)
396
397    def _iter_submodules(pkgname, pkgdir):
398        for entry in sorted(os.scandir(pkgdir), key=lambda e: e.name):
399            matched, recursive = match_modname(entry.name)
400            if not matched:
401                continue
402            modname = f'{pkgname}.{entry.name}'
403            if modname.endswith('.py'):
404                yield modname[:-3], entry.path, False
405            elif entry.is_dir():
406                pyfile = os.path.join(entry.path, '__init__.py')
407                # We ignore namespace packages.
408                if os.path.exists(pyfile):
409                    yield modname, pyfile, True
410                    if recursive:
411                        yield from _iter_submodules(modname, entry.path)
412
413    return _iter_submodules(pkgname, pkgdir)
414
415
416def _resolve_modname_matcher(match, rootdir=None):
417    if isinstance(match, str):
418        if match.startswith('**.'):
419            recursive = True
420            pat = match[3:]
421            assert match
422        else:
423            recursive = False
424            pat = match
425
426        if pat == '*':
427            def match_modname(modname):
428                return True, recursive
429        else:
430            raise NotImplementedError(match)
431    elif callable(match):
432        match_modname = match(rootdir)
433    else:
434        raise ValueError(f'unsupported matcher {match!r}')
435    return match_modname
436
437
438def _resolve_module(modname, pathentry=STDLIB_DIR, ispkg=False):
439    assert pathentry, pathentry
440    pathentry = os.path.normpath(pathentry)
441    assert os.path.isabs(pathentry)
442    if ispkg:
443        return os.path.join(pathentry, *modname.split('.'), '__init__.py')
444    return os.path.join(pathentry, *modname.split('.')) + '.py'
445
446
447#######################################
448# regenerating dependent files
449
450def find_marker(lines, marker, file):
451    for pos, line in enumerate(lines):
452        if marker in line:
453            return pos
454    raise Exception(f"Can't find {marker!r} in file {file}")
455
456
457def replace_block(lines, start_marker, end_marker, replacements, file):
458    start_pos = find_marker(lines, start_marker, file)
459    end_pos = find_marker(lines, end_marker, file)
460    if end_pos <= start_pos:
461        raise Exception(f"End marker {end_marker!r} "
462                        f"occurs before start marker {start_marker!r} "
463                        f"in file {file}")
464    replacements = [line.rstrip() + '\n' for line in replacements]
465    return lines[:start_pos + 1] + replacements + lines[end_pos:]
466
467
468class UniqueList(list):
469    def __init__(self):
470        self._seen = set()
471
472    def append(self, item):
473        if item in self._seen:
474            return
475        super().append(item)
476        self._seen.add(item)
477
478
479def regen_frozen(modules):
480    headerlines = []
481    parentdir = os.path.dirname(FROZEN_FILE)
482    for src in _iter_sources(modules):
483        # Adding a comment to separate sections here doesn't add much,
484        # so we don't.
485        header = relpath_for_posix_display(src.frozenfile, parentdir)
486        headerlines.append(f'#include "{header}"')
487
488    externlines = UniqueList()
489    bootstraplines = []
490    stdliblines = []
491    testlines = []
492    aliaslines = []
493    indent = '    '
494    lastsection = None
495    for mod in modules:
496        if mod.isbootstrap:
497            lines = bootstraplines
498        elif mod.section == TESTS_SECTION:
499            lines = testlines
500        else:
501            lines = stdliblines
502            if mod.section != lastsection:
503                if lastsection is not None:
504                    lines.append('')
505                lines.append(f'/* {mod.section} */')
506            lastsection = mod.section
507
508        pkg = 'true' if mod.ispkg else 'false'
509        size = f"(int)sizeof({mod.symbol})"
510        line = f'{{"{mod.name}", {mod.symbol}, {size}, {pkg}}},'
511        lines.append(line)
512
513        if mod.isalias:
514            if not mod.orig:
515                entry = '{"%s", NULL},' % (mod.name,)
516            elif mod.source.ispkg:
517                entry = '{"%s", "<%s"},' % (mod.name, mod.orig)
518            else:
519                entry = '{"%s", "%s"},' % (mod.name, mod.orig)
520            aliaslines.append(indent + entry)
521
522    for lines in (bootstraplines, stdliblines, testlines):
523        # TODO: Is this necessary any more?
524        if lines and not lines[0]:
525            del lines[0]
526        for i, line in enumerate(lines):
527            if line:
528                lines[i] = indent + line
529
530    print(f'# Updating {os.path.relpath(FROZEN_FILE)}')
531    with updating_file_with_tmpfile(FROZEN_FILE) as (infile, outfile):
532        lines = infile.readlines()
533        # TODO: Use more obvious markers, e.g.
534        # $START GENERATED FOOBAR$ / $END GENERATED FOOBAR$
535        lines = replace_block(
536            lines,
537            "/* Includes for frozen modules: */",
538            "/* End includes */",
539            headerlines,
540            FROZEN_FILE,
541        )
542        lines = replace_block(
543            lines,
544            "static const struct _frozen bootstrap_modules[] =",
545            "/* bootstrap sentinel */",
546            bootstraplines,
547            FROZEN_FILE,
548        )
549        lines = replace_block(
550            lines,
551            "static const struct _frozen stdlib_modules[] =",
552            "/* stdlib sentinel */",
553            stdliblines,
554            FROZEN_FILE,
555        )
556        lines = replace_block(
557            lines,
558            "static const struct _frozen test_modules[] =",
559            "/* test sentinel */",
560            testlines,
561            FROZEN_FILE,
562        )
563        lines = replace_block(
564            lines,
565            "const struct _module_alias aliases[] =",
566            "/* aliases sentinel */",
567            aliaslines,
568            FROZEN_FILE,
569        )
570        outfile.writelines(lines)
571
572
573def regen_makefile(modules):
574    pyfiles = []
575    frozenfiles = []
576    rules = ['']
577    for src in _iter_sources(modules):
578        frozen_header = relpath_for_posix_display(src.frozenfile, ROOT_DIR)
579        frozenfiles.append(f'\t\t{frozen_header} \\')
580
581        pyfile = relpath_for_posix_display(src.pyfile, ROOT_DIR)
582        pyfiles.append(f'\t\t{pyfile} \\')
583
584        if src.isbootstrap:
585            freezecmd = '$(FREEZE_MODULE_BOOTSTRAP)'
586            freezedep = '$(FREEZE_MODULE_BOOTSTRAP_DEPS)'
587        else:
588            freezecmd = '$(FREEZE_MODULE)'
589            freezedep = '$(FREEZE_MODULE_DEPS)'
590
591        freeze = (f'{freezecmd} {src.frozenid} '
592                    f'$(srcdir)/{pyfile} {frozen_header}')
593        rules.extend([
594            f'{frozen_header}: {pyfile} {freezedep}',
595            f'\t{freeze}',
596            '',
597        ])
598    pyfiles[-1] = pyfiles[-1].rstrip(" \\")
599    frozenfiles[-1] = frozenfiles[-1].rstrip(" \\")
600
601    print(f'# Updating {os.path.relpath(MAKEFILE)}')
602    with updating_file_with_tmpfile(MAKEFILE) as (infile, outfile):
603        lines = infile.readlines()
604        lines = replace_block(
605            lines,
606            "FROZEN_FILES_IN =",
607            "# End FROZEN_FILES_IN",
608            pyfiles,
609            MAKEFILE,
610        )
611        lines = replace_block(
612            lines,
613            "FROZEN_FILES_OUT =",
614            "# End FROZEN_FILES_OUT",
615            frozenfiles,
616            MAKEFILE,
617        )
618        lines = replace_block(
619            lines,
620            "# BEGIN: freezing modules",
621            "# END: freezing modules",
622            rules,
623            MAKEFILE,
624        )
625        outfile.writelines(lines)
626
627
628def regen_pcbuild(modules):
629    projlines = []
630    filterlines = []
631    corelines = []
632    for src in _iter_sources(modules):
633        pyfile = relpath_for_windows_display(src.pyfile, ROOT_DIR)
634        header = relpath_for_windows_display(src.frozenfile, ROOT_DIR)
635        intfile = ntpath.splitext(ntpath.basename(header))[0] + '.g.h'
636        projlines.append(f'    <None Include="..\\{pyfile}">')
637        projlines.append(f'      <ModName>{src.frozenid}</ModName>')
638        projlines.append(f'      <IntFile>$(IntDir){intfile}</IntFile>')
639        projlines.append(f'      <OutFile>$(GeneratedFrozenModulesDir){header}</OutFile>')
640        projlines.append(f'    </None>')
641
642        filterlines.append(f'    <None Include="..\\{pyfile}">')
643        filterlines.append('      <Filter>Python Files</Filter>')
644        filterlines.append('    </None>')
645
646    print(f'# Updating {os.path.relpath(PCBUILD_PROJECT)}')
647    with updating_file_with_tmpfile(PCBUILD_PROJECT) as (infile, outfile):
648        lines = infile.readlines()
649        lines = replace_block(
650            lines,
651            '<!-- BEGIN frozen modules -->',
652            '<!-- END frozen modules -->',
653            projlines,
654            PCBUILD_PROJECT,
655        )
656        outfile.writelines(lines)
657    print(f'# Updating {os.path.relpath(PCBUILD_FILTERS)}')
658    with updating_file_with_tmpfile(PCBUILD_FILTERS) as (infile, outfile):
659        lines = infile.readlines()
660        lines = replace_block(
661            lines,
662            '<!-- BEGIN frozen modules -->',
663            '<!-- END frozen modules -->',
664            filterlines,
665            PCBUILD_FILTERS,
666        )
667        outfile.writelines(lines)
668
669
670#######################################
671# the script
672
673def main():
674    # Expand the raw specs, preserving order.
675    modules = list(parse_frozen_specs())
676
677    # Regen build-related files.
678    regen_makefile(modules)
679    regen_pcbuild(modules)
680    regen_frozen(modules)
681
682
683if __name__ == '__main__':
684    main()
685