• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Target-specific code generation, parsing, and processing."""
2
3import asyncio
4import dataclasses
5import hashlib
6import json
7import os
8import pathlib
9import re
10import sys
11import tempfile
12import typing
13
14import _llvm
15import _schema
16import _stencils
17import _writer
18
19if sys.version_info < (3, 11):
20    raise RuntimeError("Building the JIT compiler requires Python 3.11 or newer!")
21
22TOOLS_JIT_BUILD = pathlib.Path(__file__).resolve()
23TOOLS_JIT = TOOLS_JIT_BUILD.parent
24TOOLS = TOOLS_JIT.parent
25CPYTHON = TOOLS.parent
26PYTHON_EXECUTOR_CASES_C_H = CPYTHON / "Python" / "executor_cases.c.h"
27TOOLS_JIT_TEMPLATE_C = TOOLS_JIT / "template.c"
28
29
30_S = typing.TypeVar("_S", _schema.COFFSection, _schema.ELFSection, _schema.MachOSection)
31_R = typing.TypeVar(
32    "_R", _schema.COFFRelocation, _schema.ELFRelocation, _schema.MachORelocation
33)
34
35
36@dataclasses.dataclass
37class _Target(typing.Generic[_S, _R]):
38    triple: str
39    _: dataclasses.KW_ONLY
40    alignment: int = 1
41    args: typing.Sequence[str] = ()
42    ghccc: bool = False
43    prefix: str = ""
44    stable: bool = False
45    debug: bool = False
46    verbose: bool = False
47
48    def _compute_digest(self, out: pathlib.Path) -> str:
49        hasher = hashlib.sha256()
50        hasher.update(self.triple.encode())
51        hasher.update(self.debug.to_bytes())
52        # These dependencies are also reflected in _JITSources in regen.targets:
53        hasher.update(PYTHON_EXECUTOR_CASES_C_H.read_bytes())
54        hasher.update((out / "pyconfig.h").read_bytes())
55        for dirpath, _, filenames in sorted(os.walk(TOOLS_JIT)):
56            for filename in filenames:
57                hasher.update(pathlib.Path(dirpath, filename).read_bytes())
58        return hasher.hexdigest()
59
60    async def _parse(self, path: pathlib.Path) -> _stencils.StencilGroup:
61        group = _stencils.StencilGroup()
62        args = ["--disassemble", "--reloc", f"{path}"]
63        output = await _llvm.maybe_run("llvm-objdump", args, echo=self.verbose)
64        if output is not None:
65            group.code.disassembly.extend(
66                line.expandtabs().strip()
67                for line in output.splitlines()
68                if not line.isspace()
69            )
70        args = [
71            "--elf-output-style=JSON",
72            "--expand-relocs",
73            # "--pretty-print",
74            "--section-data",
75            "--section-relocations",
76            "--section-symbols",
77            "--sections",
78            f"{path}",
79        ]
80        output = await _llvm.run("llvm-readobj", args, echo=self.verbose)
81        # --elf-output-style=JSON is only *slightly* broken on Mach-O...
82        output = output.replace("PrivateExtern\n", "\n")
83        output = output.replace("Extern\n", "\n")
84        # ...and also COFF:
85        output = output[output.index("[", 1, None) :]
86        output = output[: output.rindex("]", None, -1) + 1]
87        sections: list[dict[typing.Literal["Section"], _S]] = json.loads(output)
88        for wrapped_section in sections:
89            self._handle_section(wrapped_section["Section"], group)
90        # The trampoline's entry point is just named "_ENTRY", since on some
91        # platforms we later assume that any function starting with "_JIT_" uses
92        # the GHC calling convention:
93        entry_symbol = "_JIT_ENTRY" if "_JIT_ENTRY" in group.symbols else "_ENTRY"
94        assert group.symbols[entry_symbol] == (_stencils.HoleValue.CODE, 0)
95        if group.data.body:
96            line = f"0: {str(bytes(group.data.body)).removeprefix('b')}"
97            group.data.disassembly.append(line)
98        group.process_relocations(alignment=self.alignment)
99        return group
100
101    def _handle_section(self, section: _S, group: _stencils.StencilGroup) -> None:
102        raise NotImplementedError(type(self))
103
104    def _handle_relocation(
105        self, base: int, relocation: _R, raw: bytes
106    ) -> _stencils.Hole:
107        raise NotImplementedError(type(self))
108
109    async def _compile(
110        self, opname: str, c: pathlib.Path, tempdir: pathlib.Path
111    ) -> _stencils.StencilGroup:
112        # "Compile" the trampoline to an empty stencil group if it's not needed:
113        if opname == "trampoline" and not self.ghccc:
114            return _stencils.StencilGroup()
115        o = tempdir / f"{opname}.o"
116        args = [
117            f"--target={self.triple}",
118            "-DPy_BUILD_CORE_MODULE",
119            "-D_DEBUG" if self.debug else "-DNDEBUG",
120            f"-D_JIT_OPCODE={opname}",
121            "-D_PyJIT_ACTIVE",
122            "-D_Py_JIT",
123            "-I.",
124            f"-I{CPYTHON / 'Include'}",
125            f"-I{CPYTHON / 'Include' / 'internal'}",
126            f"-I{CPYTHON / 'Include' / 'internal' / 'mimalloc'}",
127            f"-I{CPYTHON / 'Python'}",
128            "-O3",
129            "-c",
130            # This debug info isn't necessary, and bloats out the JIT'ed code.
131            # We *may* be able to re-enable this, process it, and JIT it for a
132            # nicer debugging experience... but that needs a lot more research:
133            "-fno-asynchronous-unwind-tables",
134            # Don't call built-in functions that we can't find or patch:
135            "-fno-builtin",
136            # Emit relaxable 64-bit calls/jumps, so we don't have to worry about
137            # about emitting in-range trampolines for out-of-range targets.
138            # We can probably remove this and emit trampolines in the future:
139            "-fno-plt",
140            # Don't call stack-smashing canaries that we can't find or patch:
141            "-fno-stack-protector",
142            "-std=c11",
143            *self.args,
144        ]
145        if self.ghccc:
146            # This is a bit of an ugly workaround, but it makes the code much
147            # smaller and faster, so it's worth it. We want to use the GHC
148            # calling convention, but Clang doesn't support it. So, we *first*
149            # compile the code to LLVM IR, perform some text replacements on the
150            # IR to change the calling convention(!), and then compile *that*.
151            # Once we have access to Clang 19, we can get rid of this and use
152            # __attribute__((preserve_none)) directly in the C code instead:
153            ll = tempdir / f"{opname}.ll"
154            args_ll = args + [
155                # -fomit-frame-pointer is necessary because the GHC calling
156                # convention uses RBP to pass arguments:
157                "-S",
158                "-emit-llvm",
159                "-fomit-frame-pointer",
160                "-o",
161                f"{ll}",
162                f"{c}",
163            ]
164            await _llvm.run("clang", args_ll, echo=self.verbose)
165            ir = ll.read_text()
166            # This handles declarations, definitions, and calls to named symbols
167            # starting with "_JIT_":
168            ir = re.sub(
169                r"(((noalias|nonnull|noundef) )*ptr @_JIT_\w+\()", r"ghccc \1", ir
170            )
171            # This handles calls to anonymous callees, since anything with
172            # "musttail" needs to use the same calling convention:
173            ir = ir.replace("musttail call", "musttail call ghccc")
174            # Sometimes *both* replacements happen at the same site, so fix it:
175            ir = ir.replace("ghccc ghccc", "ghccc")
176            ll.write_text(ir)
177            args_o = args + ["-Wno-unused-command-line-argument", "-o", f"{o}", f"{ll}"]
178        else:
179            args_o = args + ["-o", f"{o}", f"{c}"]
180        await _llvm.run("clang", args_o, echo=self.verbose)
181        return await self._parse(o)
182
183    async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]:
184        generated_cases = PYTHON_EXECUTOR_CASES_C_H.read_text()
185        opnames = sorted(re.findall(r"\n {8}case (\w+): \{\n", generated_cases))
186        tasks = []
187        with tempfile.TemporaryDirectory() as tempdir:
188            work = pathlib.Path(tempdir).resolve()
189            async with asyncio.TaskGroup() as group:
190                coro = self._compile("trampoline", TOOLS_JIT / "trampoline.c", work)
191                tasks.append(group.create_task(coro, name="trampoline"))
192                for opname in opnames:
193                    coro = self._compile(opname, TOOLS_JIT_TEMPLATE_C, work)
194                    tasks.append(group.create_task(coro, name=opname))
195        return {task.get_name(): task.result() for task in tasks}
196
197    def build(
198        self, out: pathlib.Path, *, comment: str = "", force: bool = False
199    ) -> None:
200        """Build jit_stencils.h in the given directory."""
201        if not self.stable:
202            warning = f"JIT support for {self.triple} is still experimental!"
203            request = "Please report any issues you encounter.".center(len(warning))
204            outline = "=" * len(warning)
205            print("\n".join(["", outline, warning, request, outline, ""]))
206        digest = f"// {self._compute_digest(out)}\n"
207        jit_stencils = out / "jit_stencils.h"
208        if (
209            not force
210            and jit_stencils.exists()
211            and jit_stencils.read_text().startswith(digest)
212        ):
213            return
214        stencil_groups = asyncio.run(self._build_stencils())
215        jit_stencils_new = out / "jit_stencils.h.new"
216        try:
217            with jit_stencils_new.open("w") as file:
218                file.write(digest)
219                if comment:
220                    file.write(f"// {comment}\n")
221                file.write("\n")
222                for line in _writer.dump(stencil_groups):
223                    file.write(f"{line}\n")
224            try:
225                jit_stencils_new.replace(jit_stencils)
226            except FileNotFoundError:
227                # another process probably already moved the file
228                if not jit_stencils.is_file():
229                    raise
230        finally:
231            jit_stencils_new.unlink(missing_ok=True)
232
233
234class _COFF(
235    _Target[_schema.COFFSection, _schema.COFFRelocation]
236):  # pylint: disable = too-few-public-methods
237    def _handle_section(
238        self, section: _schema.COFFSection, group: _stencils.StencilGroup
239    ) -> None:
240        flags = {flag["Name"] for flag in section["Characteristics"]["Flags"]}
241        if "SectionData" in section:
242            section_data_bytes = section["SectionData"]["Bytes"]
243        else:
244            # Zeroed BSS data, seen with printf debugging calls:
245            section_data_bytes = [0] * section["RawDataSize"]
246        if "IMAGE_SCN_MEM_EXECUTE" in flags:
247            value = _stencils.HoleValue.CODE
248            stencil = group.code
249        elif "IMAGE_SCN_MEM_READ" in flags:
250            value = _stencils.HoleValue.DATA
251            stencil = group.data
252        else:
253            return
254        base = len(stencil.body)
255        group.symbols[section["Number"]] = value, base
256        stencil.body.extend(section_data_bytes)
257        for wrapped_symbol in section["Symbols"]:
258            symbol = wrapped_symbol["Symbol"]
259            offset = base + symbol["Value"]
260            name = symbol["Name"]
261            name = name.removeprefix(self.prefix)
262            if name not in group.symbols:
263                group.symbols[name] = value, offset
264        for wrapped_relocation in section["Relocations"]:
265            relocation = wrapped_relocation["Relocation"]
266            hole = self._handle_relocation(base, relocation, stencil.body)
267            stencil.holes.append(hole)
268
269    def _unwrap_dllimport(self, name: str) -> tuple[_stencils.HoleValue, str | None]:
270        if name.startswith("__imp_"):
271            name = name.removeprefix("__imp_")
272            name = name.removeprefix(self.prefix)
273            return _stencils.HoleValue.GOT, name
274        name = name.removeprefix(self.prefix)
275        return _stencils.symbol_to_value(name)
276
277    def _handle_relocation(
278        self, base: int, relocation: _schema.COFFRelocation, raw: bytes
279    ) -> _stencils.Hole:
280        match relocation:
281            case {
282                "Offset": offset,
283                "Symbol": s,
284                "Type": {"Name": "IMAGE_REL_I386_DIR32" as kind},
285            }:
286                offset += base
287                value, symbol = self._unwrap_dllimport(s)
288                addend = int.from_bytes(raw[offset : offset + 4], "little")
289            case {
290                "Offset": offset,
291                "Symbol": s,
292                "Type": {
293                    "Name": "IMAGE_REL_AMD64_REL32" | "IMAGE_REL_I386_REL32" as kind
294                },
295            }:
296                offset += base
297                value, symbol = self._unwrap_dllimport(s)
298                addend = (
299                    int.from_bytes(raw[offset : offset + 4], "little", signed=True) - 4
300                )
301            case {
302                "Offset": offset,
303                "Symbol": s,
304                "Type": {
305                    "Name": "IMAGE_REL_ARM64_BRANCH26"
306                    | "IMAGE_REL_ARM64_PAGEBASE_REL21"
307                    | "IMAGE_REL_ARM64_PAGEOFFSET_12A"
308                    | "IMAGE_REL_ARM64_PAGEOFFSET_12L" as kind
309                },
310            }:
311                offset += base
312                value, symbol = self._unwrap_dllimport(s)
313                addend = 0
314            case _:
315                raise NotImplementedError(relocation)
316        return _stencils.Hole(offset, kind, value, symbol, addend)
317
318
319class _ELF(
320    _Target[_schema.ELFSection, _schema.ELFRelocation]
321):  # pylint: disable = too-few-public-methods
322    def _handle_section(
323        self, section: _schema.ELFSection, group: _stencils.StencilGroup
324    ) -> None:
325        section_type = section["Type"]["Name"]
326        flags = {flag["Name"] for flag in section["Flags"]["Flags"]}
327        if section_type == "SHT_RELA":
328            assert "SHF_INFO_LINK" in flags, flags
329            assert not section["Symbols"]
330            value, base = group.symbols[section["Info"]]
331            if value is _stencils.HoleValue.CODE:
332                stencil = group.code
333            else:
334                assert value is _stencils.HoleValue.DATA
335                stencil = group.data
336            for wrapped_relocation in section["Relocations"]:
337                relocation = wrapped_relocation["Relocation"]
338                hole = self._handle_relocation(base, relocation, stencil.body)
339                stencil.holes.append(hole)
340        elif section_type == "SHT_PROGBITS":
341            if "SHF_ALLOC" not in flags:
342                return
343            if "SHF_EXECINSTR" in flags:
344                value = _stencils.HoleValue.CODE
345                stencil = group.code
346            else:
347                value = _stencils.HoleValue.DATA
348                stencil = group.data
349            group.symbols[section["Index"]] = value, len(stencil.body)
350            for wrapped_symbol in section["Symbols"]:
351                symbol = wrapped_symbol["Symbol"]
352                offset = len(stencil.body) + symbol["Value"]
353                name = symbol["Name"]["Name"]
354                name = name.removeprefix(self.prefix)
355                group.symbols[name] = value, offset
356            stencil.body.extend(section["SectionData"]["Bytes"])
357            assert not section["Relocations"]
358        else:
359            assert section_type in {
360                "SHT_GROUP",
361                "SHT_LLVM_ADDRSIG",
362                "SHT_NOTE",
363                "SHT_NULL",
364                "SHT_STRTAB",
365                "SHT_SYMTAB",
366            }, section_type
367
368    def _handle_relocation(
369        self, base: int, relocation: _schema.ELFRelocation, raw: bytes
370    ) -> _stencils.Hole:
371        symbol: str | None
372        match relocation:
373            case {
374                "Addend": addend,
375                "Offset": offset,
376                "Symbol": {"Name": s},
377                "Type": {
378                    "Name": "R_AARCH64_ADR_GOT_PAGE"
379                    | "R_AARCH64_LD64_GOT_LO12_NC"
380                    | "R_X86_64_GOTPCREL"
381                    | "R_X86_64_GOTPCRELX"
382                    | "R_X86_64_REX_GOTPCRELX" as kind
383                },
384            }:
385                offset += base
386                s = s.removeprefix(self.prefix)
387                value, symbol = _stencils.HoleValue.GOT, s
388            case {
389                "Addend": addend,
390                "Offset": offset,
391                "Symbol": {"Name": s},
392                "Type": {"Name": kind},
393            }:
394                offset += base
395                s = s.removeprefix(self.prefix)
396                value, symbol = _stencils.symbol_to_value(s)
397            case _:
398                raise NotImplementedError(relocation)
399        return _stencils.Hole(offset, kind, value, symbol, addend)
400
401
402class _MachO(
403    _Target[_schema.MachOSection, _schema.MachORelocation]
404):  # pylint: disable = too-few-public-methods
405    def _handle_section(
406        self, section: _schema.MachOSection, group: _stencils.StencilGroup
407    ) -> None:
408        assert section["Address"] >= len(group.code.body)
409        assert "SectionData" in section
410        flags = {flag["Name"] for flag in section["Attributes"]["Flags"]}
411        name = section["Name"]["Value"]
412        name = name.removeprefix(self.prefix)
413        if "Debug" in flags:
414            return
415        if "SomeInstructions" in flags:
416            value = _stencils.HoleValue.CODE
417            stencil = group.code
418            start_address = 0
419            group.symbols[name] = value, section["Address"] - start_address
420        else:
421            value = _stencils.HoleValue.DATA
422            stencil = group.data
423            start_address = len(group.code.body)
424            group.symbols[name] = value, len(group.code.body)
425        base = section["Address"] - start_address
426        group.symbols[section["Index"]] = value, base
427        stencil.body.extend(
428            [0] * (section["Address"] - len(group.code.body) - len(group.data.body))
429        )
430        stencil.body.extend(section["SectionData"]["Bytes"])
431        assert "Symbols" in section
432        for wrapped_symbol in section["Symbols"]:
433            symbol = wrapped_symbol["Symbol"]
434            offset = symbol["Value"] - start_address
435            name = symbol["Name"]["Name"]
436            name = name.removeprefix(self.prefix)
437            group.symbols[name] = value, offset
438        assert "Relocations" in section
439        for wrapped_relocation in section["Relocations"]:
440            relocation = wrapped_relocation["Relocation"]
441            hole = self._handle_relocation(base, relocation, stencil.body)
442            stencil.holes.append(hole)
443
444    def _handle_relocation(
445        self, base: int, relocation: _schema.MachORelocation, raw: bytes
446    ) -> _stencils.Hole:
447        symbol: str | None
448        match relocation:
449            case {
450                "Offset": offset,
451                "Symbol": {"Name": s},
452                "Type": {
453                    "Name": "ARM64_RELOC_GOT_LOAD_PAGE21"
454                    | "ARM64_RELOC_GOT_LOAD_PAGEOFF12" as kind
455                },
456            }:
457                offset += base
458                s = s.removeprefix(self.prefix)
459                value, symbol = _stencils.HoleValue.GOT, s
460                addend = 0
461            case {
462                "Offset": offset,
463                "Symbol": {"Name": s},
464                "Type": {"Name": "X86_64_RELOC_GOT" | "X86_64_RELOC_GOT_LOAD" as kind},
465            }:
466                offset += base
467                s = s.removeprefix(self.prefix)
468                value, symbol = _stencils.HoleValue.GOT, s
469                addend = (
470                    int.from_bytes(raw[offset : offset + 4], "little", signed=True) - 4
471                )
472            case {
473                "Offset": offset,
474                "Section": {"Name": s},
475                "Type": {"Name": "X86_64_RELOC_SIGNED" as kind},
476            } | {
477                "Offset": offset,
478                "Symbol": {"Name": s},
479                "Type": {"Name": "X86_64_RELOC_BRANCH" | "X86_64_RELOC_SIGNED" as kind},
480            }:
481                offset += base
482                s = s.removeprefix(self.prefix)
483                value, symbol = _stencils.symbol_to_value(s)
484                addend = (
485                    int.from_bytes(raw[offset : offset + 4], "little", signed=True) - 4
486                )
487            case {
488                "Offset": offset,
489                "Section": {"Name": s},
490                "Type": {"Name": kind},
491            } | {
492                "Offset": offset,
493                "Symbol": {"Name": s},
494                "Type": {"Name": kind},
495            }:
496                offset += base
497                s = s.removeprefix(self.prefix)
498                value, symbol = _stencils.symbol_to_value(s)
499                addend = 0
500            case _:
501                raise NotImplementedError(relocation)
502        return _stencils.Hole(offset, kind, value, symbol, addend)
503
504
505def get_target(host: str) -> _COFF | _ELF | _MachO:
506    """Build a _Target for the given host "triple" and options."""
507    # ghccc currently crashes Clang when combined with musttail on aarch64. :(
508    target: _COFF | _ELF | _MachO
509    if re.fullmatch(r"aarch64-apple-darwin.*", host):
510        target = _MachO(host, alignment=8, prefix="_")
511    elif re.fullmatch(r"aarch64-pc-windows-msvc", host):
512        args = ["-fms-runtime-lib=dll"]
513        target = _COFF(host, alignment=8, args=args)
514    elif re.fullmatch(r"aarch64-.*-linux-gnu", host):
515        args = ["-fpic"]
516        target = _ELF(host, alignment=8, args=args)
517    elif re.fullmatch(r"i686-pc-windows-msvc", host):
518        args = ["-DPy_NO_ENABLE_SHARED"]
519        target = _COFF(host, args=args, ghccc=True, prefix="_")
520    elif re.fullmatch(r"x86_64-apple-darwin.*", host):
521        target = _MachO(host, ghccc=True, prefix="_")
522    elif re.fullmatch(r"x86_64-pc-windows-msvc", host):
523        args = ["-fms-runtime-lib=dll"]
524        target = _COFF(host, args=args, ghccc=True)
525    elif re.fullmatch(r"x86_64-.*-linux-gnu", host):
526        args = ["-fpic"]
527        target = _ELF(host, args=args, ghccc=True)
528    else:
529        raise ValueError(host)
530    return target
531