• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""
2ELF file parser.
3
4This provides a class ``ELFFile`` that parses an ELF executable in a similar
5interface to ``ZipFile``. Only the read interface is implemented.
6
7Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca
8ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html
9"""
10
11import enum
12import os
13import struct
14from typing import IO, Optional, Tuple
15
16
17class ELFInvalid(ValueError):
18    pass
19
20
21class EIClass(enum.IntEnum):
22    C32 = 1
23    C64 = 2
24
25
26class EIData(enum.IntEnum):
27    Lsb = 1
28    Msb = 2
29
30
31class EMachine(enum.IntEnum):
32    I386 = 3
33    S390 = 22
34    Arm = 40
35    X8664 = 62
36    AArc64 = 183
37
38
39class ELFFile:
40    """
41    Representation of an ELF executable.
42    """
43
44    def __init__(self, f: IO[bytes]) -> None:
45        self._f = f
46
47        try:
48            ident = self._read("16B")
49        except struct.error:
50            raise ELFInvalid("unable to parse identification")
51        magic = bytes(ident[:4])
52        if magic != b"\x7fELF":
53            raise ELFInvalid(f"invalid magic: {magic!r}")
54
55        self.capacity = ident[4]  # Format for program header (bitness).
56        self.encoding = ident[5]  # Data structure encoding (endianness).
57
58        try:
59            # e_fmt: Format for program header.
60            # p_fmt: Format for section header.
61            # p_idx: Indexes to find p_type, p_offset, and p_filesz.
62            e_fmt, self._p_fmt, self._p_idx = {
63                (1, 1): ("<HHIIIIIHHH", "<IIIIIIII", (0, 1, 4)),  # 32-bit LSB.
64                (1, 2): (">HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)),  # 32-bit MSB.
65                (2, 1): ("<HHIQQQIHHH", "<IIQQQQQQ", (0, 2, 5)),  # 64-bit LSB.
66                (2, 2): (">HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)),  # 64-bit MSB.
67            }[(self.capacity, self.encoding)]
68        except KeyError:
69            raise ELFInvalid(
70                f"unrecognized capacity ({self.capacity}) or "
71                f"encoding ({self.encoding})"
72            )
73
74        try:
75            (
76                _,
77                self.machine,  # Architecture type.
78                _,
79                _,
80                self._e_phoff,  # Offset of program header.
81                _,
82                self.flags,  # Processor-specific flags.
83                _,
84                self._e_phentsize,  # Size of section.
85                self._e_phnum,  # Number of sections.
86            ) = self._read(e_fmt)
87        except struct.error as e:
88            raise ELFInvalid("unable to parse machine and section information") from e
89
90    def _read(self, fmt: str) -> Tuple[int, ...]:
91        return struct.unpack(fmt, self._f.read(struct.calcsize(fmt)))
92
93    @property
94    def interpreter(self) -> Optional[str]:
95        """
96        The path recorded in the ``PT_INTERP`` section header.
97        """
98        for index in range(self._e_phnum):
99            self._f.seek(self._e_phoff + self._e_phentsize * index)
100            try:
101                data = self._read(self._p_fmt)
102            except struct.error:
103                continue
104            if data[self._p_idx[0]] != 3:  # Not PT_INTERP.
105                continue
106            self._f.seek(data[self._p_idx[1]])
107            return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0")
108        return None
109