1""" 2ELF file parser. 3 4This provides a class ``ELFFile`` that parses an ELF executable in a similar 5interface to ``ZipFile``. Only the read interface is implemented. 6 7Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca 8ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html 9""" 10 11import enum 12import os 13import struct 14from typing import IO, Optional, Tuple 15 16 17class ELFInvalid(ValueError): 18 pass 19 20 21class EIClass(enum.IntEnum): 22 C32 = 1 23 C64 = 2 24 25 26class EIData(enum.IntEnum): 27 Lsb = 1 28 Msb = 2 29 30 31class EMachine(enum.IntEnum): 32 I386 = 3 33 S390 = 22 34 Arm = 40 35 X8664 = 62 36 AArc64 = 183 37 38 39class ELFFile: 40 """ 41 Representation of an ELF executable. 42 """ 43 44 def __init__(self, f: IO[bytes]) -> None: 45 self._f = f 46 47 try: 48 ident = self._read("16B") 49 except struct.error: 50 raise ELFInvalid("unable to parse identification") 51 magic = bytes(ident[:4]) 52 if magic != b"\x7fELF": 53 raise ELFInvalid(f"invalid magic: {magic!r}") 54 55 self.capacity = ident[4] # Format for program header (bitness). 56 self.encoding = ident[5] # Data structure encoding (endianness). 57 58 try: 59 # e_fmt: Format for program header. 60 # p_fmt: Format for section header. 61 # p_idx: Indexes to find p_type, p_offset, and p_filesz. 62 e_fmt, self._p_fmt, self._p_idx = { 63 (1, 1): ("<HHIIIIIHHH", "<IIIIIIII", (0, 1, 4)), # 32-bit LSB. 64 (1, 2): (">HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)), # 32-bit MSB. 65 (2, 1): ("<HHIQQQIHHH", "<IIQQQQQQ", (0, 2, 5)), # 64-bit LSB. 66 (2, 2): (">HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)), # 64-bit MSB. 67 }[(self.capacity, self.encoding)] 68 except KeyError: 69 raise ELFInvalid( 70 f"unrecognized capacity ({self.capacity}) or " 71 f"encoding ({self.encoding})" 72 ) 73 74 try: 75 ( 76 _, 77 self.machine, # Architecture type. 78 _, 79 _, 80 self._e_phoff, # Offset of program header. 81 _, 82 self.flags, # Processor-specific flags. 83 _, 84 self._e_phentsize, # Size of section. 85 self._e_phnum, # Number of sections. 86 ) = self._read(e_fmt) 87 except struct.error as e: 88 raise ELFInvalid("unable to parse machine and section information") from e 89 90 def _read(self, fmt: str) -> Tuple[int, ...]: 91 return struct.unpack(fmt, self._f.read(struct.calcsize(fmt))) 92 93 @property 94 def interpreter(self) -> Optional[str]: 95 """ 96 The path recorded in the ``PT_INTERP`` section header. 97 """ 98 for index in range(self._e_phnum): 99 self._f.seek(self._e_phoff + self._e_phentsize * index) 100 try: 101 data = self._read(self._p_fmt) 102 except struct.error: 103 continue 104 if data[self._p_idx[0]] != 3: # Not PT_INTERP. 105 continue 106 self._f.seek(data[self._p_idx[1]]) 107 return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0") 108 return None 109