1"""Target-specific code generation, parsing, and processing.""" 2 3import asyncio 4import dataclasses 5import hashlib 6import json 7import os 8import pathlib 9import re 10import sys 11import tempfile 12import typing 13 14import _llvm 15import _schema 16import _stencils 17import _writer 18 19if sys.version_info < (3, 11): 20 raise RuntimeError("Building the JIT compiler requires Python 3.11 or newer!") 21 22TOOLS_JIT_BUILD = pathlib.Path(__file__).resolve() 23TOOLS_JIT = TOOLS_JIT_BUILD.parent 24TOOLS = TOOLS_JIT.parent 25CPYTHON = TOOLS.parent 26PYTHON_EXECUTOR_CASES_C_H = CPYTHON / "Python" / "executor_cases.c.h" 27TOOLS_JIT_TEMPLATE_C = TOOLS_JIT / "template.c" 28 29 30_S = typing.TypeVar("_S", _schema.COFFSection, _schema.ELFSection, _schema.MachOSection) 31_R = typing.TypeVar( 32 "_R", _schema.COFFRelocation, _schema.ELFRelocation, _schema.MachORelocation 33) 34 35 36@dataclasses.dataclass 37class _Target(typing.Generic[_S, _R]): 38 triple: str 39 _: dataclasses.KW_ONLY 40 alignment: int = 1 41 args: typing.Sequence[str] = () 42 ghccc: bool = False 43 prefix: str = "" 44 stable: bool = False 45 debug: bool = False 46 verbose: bool = False 47 48 def _compute_digest(self, out: pathlib.Path) -> str: 49 hasher = hashlib.sha256() 50 hasher.update(self.triple.encode()) 51 hasher.update(self.debug.to_bytes()) 52 # These dependencies are also reflected in _JITSources in regen.targets: 53 hasher.update(PYTHON_EXECUTOR_CASES_C_H.read_bytes()) 54 hasher.update((out / "pyconfig.h").read_bytes()) 55 for dirpath, _, filenames in sorted(os.walk(TOOLS_JIT)): 56 for filename in filenames: 57 hasher.update(pathlib.Path(dirpath, filename).read_bytes()) 58 return hasher.hexdigest() 59 60 async def _parse(self, path: pathlib.Path) -> _stencils.StencilGroup: 61 group = _stencils.StencilGroup() 62 args = ["--disassemble", "--reloc", f"{path}"] 63 output = await _llvm.maybe_run("llvm-objdump", args, echo=self.verbose) 64 if output is not None: 65 group.code.disassembly.extend( 66 line.expandtabs().strip() 67 for line in output.splitlines() 68 if not line.isspace() 69 ) 70 args = [ 71 "--elf-output-style=JSON", 72 "--expand-relocs", 73 # "--pretty-print", 74 "--section-data", 75 "--section-relocations", 76 "--section-symbols", 77 "--sections", 78 f"{path}", 79 ] 80 output = await _llvm.run("llvm-readobj", args, echo=self.verbose) 81 # --elf-output-style=JSON is only *slightly* broken on Mach-O... 82 output = output.replace("PrivateExtern\n", "\n") 83 output = output.replace("Extern\n", "\n") 84 # ...and also COFF: 85 output = output[output.index("[", 1, None) :] 86 output = output[: output.rindex("]", None, -1) + 1] 87 sections: list[dict[typing.Literal["Section"], _S]] = json.loads(output) 88 for wrapped_section in sections: 89 self._handle_section(wrapped_section["Section"], group) 90 # The trampoline's entry point is just named "_ENTRY", since on some 91 # platforms we later assume that any function starting with "_JIT_" uses 92 # the GHC calling convention: 93 entry_symbol = "_JIT_ENTRY" if "_JIT_ENTRY" in group.symbols else "_ENTRY" 94 assert group.symbols[entry_symbol] == (_stencils.HoleValue.CODE, 0) 95 if group.data.body: 96 line = f"0: {str(bytes(group.data.body)).removeprefix('b')}" 97 group.data.disassembly.append(line) 98 group.process_relocations(alignment=self.alignment) 99 return group 100 101 def _handle_section(self, section: _S, group: _stencils.StencilGroup) -> None: 102 raise NotImplementedError(type(self)) 103 104 def _handle_relocation( 105 self, base: int, relocation: _R, raw: bytes 106 ) -> _stencils.Hole: 107 raise NotImplementedError(type(self)) 108 109 async def _compile( 110 self, opname: str, c: pathlib.Path, tempdir: pathlib.Path 111 ) -> _stencils.StencilGroup: 112 # "Compile" the trampoline to an empty stencil group if it's not needed: 113 if opname == "trampoline" and not self.ghccc: 114 return _stencils.StencilGroup() 115 o = tempdir / f"{opname}.o" 116 args = [ 117 f"--target={self.triple}", 118 "-DPy_BUILD_CORE_MODULE", 119 "-D_DEBUG" if self.debug else "-DNDEBUG", 120 f"-D_JIT_OPCODE={opname}", 121 "-D_PyJIT_ACTIVE", 122 "-D_Py_JIT", 123 "-I.", 124 f"-I{CPYTHON / 'Include'}", 125 f"-I{CPYTHON / 'Include' / 'internal'}", 126 f"-I{CPYTHON / 'Include' / 'internal' / 'mimalloc'}", 127 f"-I{CPYTHON / 'Python'}", 128 "-O3", 129 "-c", 130 # This debug info isn't necessary, and bloats out the JIT'ed code. 131 # We *may* be able to re-enable this, process it, and JIT it for a 132 # nicer debugging experience... but that needs a lot more research: 133 "-fno-asynchronous-unwind-tables", 134 # Don't call built-in functions that we can't find or patch: 135 "-fno-builtin", 136 # Emit relaxable 64-bit calls/jumps, so we don't have to worry about 137 # about emitting in-range trampolines for out-of-range targets. 138 # We can probably remove this and emit trampolines in the future: 139 "-fno-plt", 140 # Don't call stack-smashing canaries that we can't find or patch: 141 "-fno-stack-protector", 142 "-std=c11", 143 *self.args, 144 ] 145 if self.ghccc: 146 # This is a bit of an ugly workaround, but it makes the code much 147 # smaller and faster, so it's worth it. We want to use the GHC 148 # calling convention, but Clang doesn't support it. So, we *first* 149 # compile the code to LLVM IR, perform some text replacements on the 150 # IR to change the calling convention(!), and then compile *that*. 151 # Once we have access to Clang 19, we can get rid of this and use 152 # __attribute__((preserve_none)) directly in the C code instead: 153 ll = tempdir / f"{opname}.ll" 154 args_ll = args + [ 155 # -fomit-frame-pointer is necessary because the GHC calling 156 # convention uses RBP to pass arguments: 157 "-S", 158 "-emit-llvm", 159 "-fomit-frame-pointer", 160 "-o", 161 f"{ll}", 162 f"{c}", 163 ] 164 await _llvm.run("clang", args_ll, echo=self.verbose) 165 ir = ll.read_text() 166 # This handles declarations, definitions, and calls to named symbols 167 # starting with "_JIT_": 168 ir = re.sub( 169 r"(((noalias|nonnull|noundef) )*ptr @_JIT_\w+\()", r"ghccc \1", ir 170 ) 171 # This handles calls to anonymous callees, since anything with 172 # "musttail" needs to use the same calling convention: 173 ir = ir.replace("musttail call", "musttail call ghccc") 174 # Sometimes *both* replacements happen at the same site, so fix it: 175 ir = ir.replace("ghccc ghccc", "ghccc") 176 ll.write_text(ir) 177 args_o = args + ["-Wno-unused-command-line-argument", "-o", f"{o}", f"{ll}"] 178 else: 179 args_o = args + ["-o", f"{o}", f"{c}"] 180 await _llvm.run("clang", args_o, echo=self.verbose) 181 return await self._parse(o) 182 183 async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]: 184 generated_cases = PYTHON_EXECUTOR_CASES_C_H.read_text() 185 opnames = sorted(re.findall(r"\n {8}case (\w+): \{\n", generated_cases)) 186 tasks = [] 187 with tempfile.TemporaryDirectory() as tempdir: 188 work = pathlib.Path(tempdir).resolve() 189 async with asyncio.TaskGroup() as group: 190 coro = self._compile("trampoline", TOOLS_JIT / "trampoline.c", work) 191 tasks.append(group.create_task(coro, name="trampoline")) 192 for opname in opnames: 193 coro = self._compile(opname, TOOLS_JIT_TEMPLATE_C, work) 194 tasks.append(group.create_task(coro, name=opname)) 195 return {task.get_name(): task.result() for task in tasks} 196 197 def build( 198 self, out: pathlib.Path, *, comment: str = "", force: bool = False 199 ) -> None: 200 """Build jit_stencils.h in the given directory.""" 201 if not self.stable: 202 warning = f"JIT support for {self.triple} is still experimental!" 203 request = "Please report any issues you encounter.".center(len(warning)) 204 outline = "=" * len(warning) 205 print("\n".join(["", outline, warning, request, outline, ""])) 206 digest = f"// {self._compute_digest(out)}\n" 207 jit_stencils = out / "jit_stencils.h" 208 if ( 209 not force 210 and jit_stencils.exists() 211 and jit_stencils.read_text().startswith(digest) 212 ): 213 return 214 stencil_groups = asyncio.run(self._build_stencils()) 215 jit_stencils_new = out / "jit_stencils.h.new" 216 try: 217 with jit_stencils_new.open("w") as file: 218 file.write(digest) 219 if comment: 220 file.write(f"// {comment}\n") 221 file.write("\n") 222 for line in _writer.dump(stencil_groups): 223 file.write(f"{line}\n") 224 try: 225 jit_stencils_new.replace(jit_stencils) 226 except FileNotFoundError: 227 # another process probably already moved the file 228 if not jit_stencils.is_file(): 229 raise 230 finally: 231 jit_stencils_new.unlink(missing_ok=True) 232 233 234class _COFF( 235 _Target[_schema.COFFSection, _schema.COFFRelocation] 236): # pylint: disable = too-few-public-methods 237 def _handle_section( 238 self, section: _schema.COFFSection, group: _stencils.StencilGroup 239 ) -> None: 240 flags = {flag["Name"] for flag in section["Characteristics"]["Flags"]} 241 if "SectionData" in section: 242 section_data_bytes = section["SectionData"]["Bytes"] 243 else: 244 # Zeroed BSS data, seen with printf debugging calls: 245 section_data_bytes = [0] * section["RawDataSize"] 246 if "IMAGE_SCN_MEM_EXECUTE" in flags: 247 value = _stencils.HoleValue.CODE 248 stencil = group.code 249 elif "IMAGE_SCN_MEM_READ" in flags: 250 value = _stencils.HoleValue.DATA 251 stencil = group.data 252 else: 253 return 254 base = len(stencil.body) 255 group.symbols[section["Number"]] = value, base 256 stencil.body.extend(section_data_bytes) 257 for wrapped_symbol in section["Symbols"]: 258 symbol = wrapped_symbol["Symbol"] 259 offset = base + symbol["Value"] 260 name = symbol["Name"] 261 name = name.removeprefix(self.prefix) 262 if name not in group.symbols: 263 group.symbols[name] = value, offset 264 for wrapped_relocation in section["Relocations"]: 265 relocation = wrapped_relocation["Relocation"] 266 hole = self._handle_relocation(base, relocation, stencil.body) 267 stencil.holes.append(hole) 268 269 def _unwrap_dllimport(self, name: str) -> tuple[_stencils.HoleValue, str | None]: 270 if name.startswith("__imp_"): 271 name = name.removeprefix("__imp_") 272 name = name.removeprefix(self.prefix) 273 return _stencils.HoleValue.GOT, name 274 name = name.removeprefix(self.prefix) 275 return _stencils.symbol_to_value(name) 276 277 def _handle_relocation( 278 self, base: int, relocation: _schema.COFFRelocation, raw: bytes 279 ) -> _stencils.Hole: 280 match relocation: 281 case { 282 "Offset": offset, 283 "Symbol": s, 284 "Type": {"Name": "IMAGE_REL_I386_DIR32" as kind}, 285 }: 286 offset += base 287 value, symbol = self._unwrap_dllimport(s) 288 addend = int.from_bytes(raw[offset : offset + 4], "little") 289 case { 290 "Offset": offset, 291 "Symbol": s, 292 "Type": { 293 "Name": "IMAGE_REL_AMD64_REL32" | "IMAGE_REL_I386_REL32" as kind 294 }, 295 }: 296 offset += base 297 value, symbol = self._unwrap_dllimport(s) 298 addend = ( 299 int.from_bytes(raw[offset : offset + 4], "little", signed=True) - 4 300 ) 301 case { 302 "Offset": offset, 303 "Symbol": s, 304 "Type": { 305 "Name": "IMAGE_REL_ARM64_BRANCH26" 306 | "IMAGE_REL_ARM64_PAGEBASE_REL21" 307 | "IMAGE_REL_ARM64_PAGEOFFSET_12A" 308 | "IMAGE_REL_ARM64_PAGEOFFSET_12L" as kind 309 }, 310 }: 311 offset += base 312 value, symbol = self._unwrap_dllimport(s) 313 addend = 0 314 case _: 315 raise NotImplementedError(relocation) 316 return _stencils.Hole(offset, kind, value, symbol, addend) 317 318 319class _ELF( 320 _Target[_schema.ELFSection, _schema.ELFRelocation] 321): # pylint: disable = too-few-public-methods 322 def _handle_section( 323 self, section: _schema.ELFSection, group: _stencils.StencilGroup 324 ) -> None: 325 section_type = section["Type"]["Name"] 326 flags = {flag["Name"] for flag in section["Flags"]["Flags"]} 327 if section_type == "SHT_RELA": 328 assert "SHF_INFO_LINK" in flags, flags 329 assert not section["Symbols"] 330 value, base = group.symbols[section["Info"]] 331 if value is _stencils.HoleValue.CODE: 332 stencil = group.code 333 else: 334 assert value is _stencils.HoleValue.DATA 335 stencil = group.data 336 for wrapped_relocation in section["Relocations"]: 337 relocation = wrapped_relocation["Relocation"] 338 hole = self._handle_relocation(base, relocation, stencil.body) 339 stencil.holes.append(hole) 340 elif section_type == "SHT_PROGBITS": 341 if "SHF_ALLOC" not in flags: 342 return 343 if "SHF_EXECINSTR" in flags: 344 value = _stencils.HoleValue.CODE 345 stencil = group.code 346 else: 347 value = _stencils.HoleValue.DATA 348 stencil = group.data 349 group.symbols[section["Index"]] = value, len(stencil.body) 350 for wrapped_symbol in section["Symbols"]: 351 symbol = wrapped_symbol["Symbol"] 352 offset = len(stencil.body) + symbol["Value"] 353 name = symbol["Name"]["Name"] 354 name = name.removeprefix(self.prefix) 355 group.symbols[name] = value, offset 356 stencil.body.extend(section["SectionData"]["Bytes"]) 357 assert not section["Relocations"] 358 else: 359 assert section_type in { 360 "SHT_GROUP", 361 "SHT_LLVM_ADDRSIG", 362 "SHT_NOTE", 363 "SHT_NULL", 364 "SHT_STRTAB", 365 "SHT_SYMTAB", 366 }, section_type 367 368 def _handle_relocation( 369 self, base: int, relocation: _schema.ELFRelocation, raw: bytes 370 ) -> _stencils.Hole: 371 symbol: str | None 372 match relocation: 373 case { 374 "Addend": addend, 375 "Offset": offset, 376 "Symbol": {"Name": s}, 377 "Type": { 378 "Name": "R_AARCH64_ADR_GOT_PAGE" 379 | "R_AARCH64_LD64_GOT_LO12_NC" 380 | "R_X86_64_GOTPCREL" 381 | "R_X86_64_GOTPCRELX" 382 | "R_X86_64_REX_GOTPCRELX" as kind 383 }, 384 }: 385 offset += base 386 s = s.removeprefix(self.prefix) 387 value, symbol = _stencils.HoleValue.GOT, s 388 case { 389 "Addend": addend, 390 "Offset": offset, 391 "Symbol": {"Name": s}, 392 "Type": {"Name": kind}, 393 }: 394 offset += base 395 s = s.removeprefix(self.prefix) 396 value, symbol = _stencils.symbol_to_value(s) 397 case _: 398 raise NotImplementedError(relocation) 399 return _stencils.Hole(offset, kind, value, symbol, addend) 400 401 402class _MachO( 403 _Target[_schema.MachOSection, _schema.MachORelocation] 404): # pylint: disable = too-few-public-methods 405 def _handle_section( 406 self, section: _schema.MachOSection, group: _stencils.StencilGroup 407 ) -> None: 408 assert section["Address"] >= len(group.code.body) 409 assert "SectionData" in section 410 flags = {flag["Name"] for flag in section["Attributes"]["Flags"]} 411 name = section["Name"]["Value"] 412 name = name.removeprefix(self.prefix) 413 if "Debug" in flags: 414 return 415 if "SomeInstructions" in flags: 416 value = _stencils.HoleValue.CODE 417 stencil = group.code 418 start_address = 0 419 group.symbols[name] = value, section["Address"] - start_address 420 else: 421 value = _stencils.HoleValue.DATA 422 stencil = group.data 423 start_address = len(group.code.body) 424 group.symbols[name] = value, len(group.code.body) 425 base = section["Address"] - start_address 426 group.symbols[section["Index"]] = value, base 427 stencil.body.extend( 428 [0] * (section["Address"] - len(group.code.body) - len(group.data.body)) 429 ) 430 stencil.body.extend(section["SectionData"]["Bytes"]) 431 assert "Symbols" in section 432 for wrapped_symbol in section["Symbols"]: 433 symbol = wrapped_symbol["Symbol"] 434 offset = symbol["Value"] - start_address 435 name = symbol["Name"]["Name"] 436 name = name.removeprefix(self.prefix) 437 group.symbols[name] = value, offset 438 assert "Relocations" in section 439 for wrapped_relocation in section["Relocations"]: 440 relocation = wrapped_relocation["Relocation"] 441 hole = self._handle_relocation(base, relocation, stencil.body) 442 stencil.holes.append(hole) 443 444 def _handle_relocation( 445 self, base: int, relocation: _schema.MachORelocation, raw: bytes 446 ) -> _stencils.Hole: 447 symbol: str | None 448 match relocation: 449 case { 450 "Offset": offset, 451 "Symbol": {"Name": s}, 452 "Type": { 453 "Name": "ARM64_RELOC_GOT_LOAD_PAGE21" 454 | "ARM64_RELOC_GOT_LOAD_PAGEOFF12" as kind 455 }, 456 }: 457 offset += base 458 s = s.removeprefix(self.prefix) 459 value, symbol = _stencils.HoleValue.GOT, s 460 addend = 0 461 case { 462 "Offset": offset, 463 "Symbol": {"Name": s}, 464 "Type": {"Name": "X86_64_RELOC_GOT" | "X86_64_RELOC_GOT_LOAD" as kind}, 465 }: 466 offset += base 467 s = s.removeprefix(self.prefix) 468 value, symbol = _stencils.HoleValue.GOT, s 469 addend = ( 470 int.from_bytes(raw[offset : offset + 4], "little", signed=True) - 4 471 ) 472 case { 473 "Offset": offset, 474 "Section": {"Name": s}, 475 "Type": {"Name": "X86_64_RELOC_SIGNED" as kind}, 476 } | { 477 "Offset": offset, 478 "Symbol": {"Name": s}, 479 "Type": {"Name": "X86_64_RELOC_BRANCH" | "X86_64_RELOC_SIGNED" as kind}, 480 }: 481 offset += base 482 s = s.removeprefix(self.prefix) 483 value, symbol = _stencils.symbol_to_value(s) 484 addend = ( 485 int.from_bytes(raw[offset : offset + 4], "little", signed=True) - 4 486 ) 487 case { 488 "Offset": offset, 489 "Section": {"Name": s}, 490 "Type": {"Name": kind}, 491 } | { 492 "Offset": offset, 493 "Symbol": {"Name": s}, 494 "Type": {"Name": kind}, 495 }: 496 offset += base 497 s = s.removeprefix(self.prefix) 498 value, symbol = _stencils.symbol_to_value(s) 499 addend = 0 500 case _: 501 raise NotImplementedError(relocation) 502 return _stencils.Hole(offset, kind, value, symbol, addend) 503 504 505def get_target(host: str) -> _COFF | _ELF | _MachO: 506 """Build a _Target for the given host "triple" and options.""" 507 # ghccc currently crashes Clang when combined with musttail on aarch64. :( 508 target: _COFF | _ELF | _MachO 509 if re.fullmatch(r"aarch64-apple-darwin.*", host): 510 target = _MachO(host, alignment=8, prefix="_") 511 elif re.fullmatch(r"aarch64-pc-windows-msvc", host): 512 args = ["-fms-runtime-lib=dll"] 513 target = _COFF(host, alignment=8, args=args) 514 elif re.fullmatch(r"aarch64-.*-linux-gnu", host): 515 args = ["-fpic"] 516 target = _ELF(host, alignment=8, args=args) 517 elif re.fullmatch(r"i686-pc-windows-msvc", host): 518 args = ["-DPy_NO_ENABLE_SHARED"] 519 target = _COFF(host, args=args, ghccc=True, prefix="_") 520 elif re.fullmatch(r"x86_64-apple-darwin.*", host): 521 target = _MachO(host, ghccc=True, prefix="_") 522 elif re.fullmatch(r"x86_64-pc-windows-msvc", host): 523 args = ["-fms-runtime-lib=dll"] 524 target = _COFF(host, args=args, ghccc=True) 525 elif re.fullmatch(r"x86_64-.*-linux-gnu", host): 526 args = ["-fpic"] 527 target = _ELF(host, args=args, ghccc=True) 528 else: 529 raise ValueError(host) 530 return target 531