• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# Copyright 2020 The Pigweed Authors
3#
4# Licensed under the Apache License, Version 2.0 (the "License"); you may not
5# use this file except in compliance with the License. You may obtain a copy of
6# the License at
7#
8#     https://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13# License for the specific language governing permissions and limitations under
14# the License.
15"""Reads data from ELF sections.
16
17This module provides tools for dumping the contents of an ELF section. It can
18also be used to read values at a particular address. A command line interface
19for both of these features is provided.
20
21This module supports any ELF-format file, including .o and .so files. This
22module also has basic support for archive (.a) files. All ELF files in an
23archive are read as one unit.
24"""
25
26import argparse
27import collections
28from pathlib import Path
29import re
30import struct
31import sys
32from typing import (
33    BinaryIO,
34    Iterable,
35    Mapping,
36    NamedTuple,
37    Optional,
38    Pattern,
39    Tuple,
40    Union,
41)
42
43ARCHIVE_MAGIC = b'!<arch>\n'
44ELF_MAGIC = b'\x7fELF'
45
46
47def _check_next_bytes(fd: BinaryIO, expected: bytes, what: str) -> None:
48    actual = fd.read(len(expected))
49    if expected != actual:
50        raise FileDecodeError(
51            f'Invalid {what}: expected {expected!r}, found {actual!r} in file '
52            f'{getattr(fd, "name", "(unknown")}'
53        )
54
55
56def files_in_archive(fd: BinaryIO) -> Iterable[int]:
57    """Seeks to each file in an archive and yields its size."""
58
59    _check_next_bytes(fd, ARCHIVE_MAGIC, 'archive magic number')
60
61    while True:
62        # In some archives, the first file ends with an additional \n. If that
63        # is present, skip it.
64        if fd.read(1) != b'\n':
65            fd.seek(-1, 1)
66
67        # Each file in an archive is prefixed with an ASCII header:
68        #
69        #   16 B - file identifier (text)
70        #   12 B - file modification timestamp (decimal)
71        #    6 B - owner ID (decimal)
72        #    6 B - group ID (decimal)
73        #    8 B - file mode (octal)
74        #   10 B - file size in bytes (decimal)
75        #    2 B - ending characters (`\n)
76        #
77        # Skip the unused portions of the file header, then read the size.
78        fd.seek(16 + 12 + 6 + 6 + 8, 1)
79        size_str = fd.read(10)
80        if not size_str:
81            return
82
83        try:
84            size = int(size_str, 10)
85        except ValueError as exc:
86            raise FileDecodeError(
87                'Archive file sizes must be decimal integers'
88            ) from exc
89
90        _check_next_bytes(fd, b'`\n', 'archive file header ending')
91        offset = fd.tell()  # Store offset in case the caller reads the file.
92
93        yield size
94
95        fd.seek(offset + size)
96
97
98def _elf_files_in_archive(fd: BinaryIO):
99    if _bytes_match(fd, ELF_MAGIC):
100        yield  # The value isn't used, so just yield None.
101    else:
102        for _ in files_in_archive(fd):
103            if _bytes_match(fd, ELF_MAGIC):
104                yield
105
106
107class Field(NamedTuple):
108    """A field in an ELF file.
109
110    Fields refer to a particular piece of data in an ELF file or section header.
111    """
112
113    name: str
114    offset_32: int
115    offset_64: int
116    size_32: int
117    size_64: int
118
119
120class _FileHeader(NamedTuple):
121    """Fields in the ELF file header."""
122
123    section_header_offset: Field = Field('e_shoff', 0x20, 0x28, 4, 8)
124    section_count: Field = Field('e_shnum', 0x30, 0x3C, 2, 2)
125    section_names_index: Field = Field('e_shstrndx', 0x32, 0x3E, 2, 2)
126
127
128FILE_HEADER = _FileHeader()
129
130
131class _SectionHeader(NamedTuple):
132    """Fields in an ELF section header."""
133
134    section_name_offset: Field = Field('sh_name', 0x00, 0x00, 4, 4)
135    section_address: Field = Field('sh_addr', 0x0C, 0x10, 4, 8)
136    section_offset: Field = Field('sh_offset', 0x10, 0x18, 4, 8)
137    section_size: Field = Field('sh_size', 0x14, 0x20, 4, 8)
138
139    # section_header_end records the size of the header.
140    section_header_end: Field = Field('section end', 0x28, 0x40, 0, 0)
141
142
143SECTION_HEADER = _SectionHeader()
144
145
146def read_c_string(fd: BinaryIO) -> bytes:
147    """Reads a null-terminated string from the provided file descriptor."""
148    string = bytearray()
149    while True:
150        byte = fd.read(1)
151        if not byte or byte == b'\0':
152            return bytes(string)
153        string += byte
154
155
156def _bytes_match(fd: BinaryIO, expected: bytes) -> bool:
157    """Peeks at the next bytes to see if they match the expected."""
158    try:
159        offset = fd.tell()
160        data = fd.read(len(expected))
161        fd.seek(offset)
162        return data == expected
163    except IOError:
164        return False
165
166
167def compatible_file(file: Union[BinaryIO, str, Path]) -> bool:
168    """True if the file type is supported (ELF or archive)."""
169    try:
170        fd = open(file, 'rb') if isinstance(file, (str, Path)) else file
171
172        offset = fd.tell()
173        fd.seek(0)
174        result = _bytes_match(fd, ELF_MAGIC) or _bytes_match(fd, ARCHIVE_MAGIC)
175        fd.seek(offset)
176    finally:
177        if isinstance(file, (str, Path)):
178            fd.close()
179
180    return result
181
182
183class FileDecodeError(Exception):
184    """Invalid data was read from an ELF file."""
185
186
187class FieldReader:
188    """Reads ELF fields defined with a Field tuple from an ELF file."""
189
190    def __init__(self, elf: BinaryIO):
191        self._elf = elf
192        self.file_offset = self._elf.tell()
193
194        _check_next_bytes(self._elf, ELF_MAGIC, 'ELF file header')
195        size_field = self._elf.read(1)  # e_ident[EI_CLASS] (address size)
196
197        int_unpacker = self._determine_integer_format()
198
199        if size_field == b'\x01':
200            self.offset = lambda field: field.offset_32
201            self._size = lambda field: field.size_32
202            self._decode = lambda f, d: int_unpacker[f.size_32].unpack(d)[0]
203        elif size_field == b'\x02':
204            self.offset = lambda field: field.offset_64
205            self._size = lambda field: field.size_64
206            self._decode = lambda f, d: int_unpacker[f.size_64].unpack(d)[0]
207        else:
208            raise FileDecodeError('Unknown size {!r}'.format(size_field))
209
210    def _determine_integer_format(self) -> Mapping[int, struct.Struct]:
211        """Returns a dict of structs used for converting bytes to integers."""
212        endianness_byte = self._elf.read(1)  # e_ident[EI_DATA] (endianness)
213        if endianness_byte == b'\x01':
214            endianness = '<'
215        elif endianness_byte == b'\x02':
216            endianness = '>'
217        else:
218            raise FileDecodeError(
219                'Unknown endianness {!r}'.format(endianness_byte)
220            )
221
222        return {
223            1: struct.Struct(endianness + 'B'),
224            2: struct.Struct(endianness + 'H'),
225            4: struct.Struct(endianness + 'I'),
226            8: struct.Struct(endianness + 'Q'),
227        }
228
229    def read(self, field: Field, base: int = 0) -> int:
230        self._elf.seek(self.file_offset + base + self.offset(field))
231        data = self._elf.read(self._size(field))
232        return self._decode(field, data)
233
234    def read_string(self, offset: int) -> str:
235        self._elf.seek(self.file_offset + offset)
236        return read_c_string(self._elf).decode()
237
238
239class Elf:
240    """Represents an ELF file and the sections in it."""
241
242    class Section(NamedTuple):
243        """Info about a section in an ELF file."""
244
245        name: str
246        address: int
247        offset: int
248        size: int
249
250        file_offset: int  # Starting place in the file; 0 unless in an archive.
251
252        def range(self) -> range:
253            return range(self.address, self.address + self.size)
254
255        def __lt__(self, other) -> bool:
256            return self.address < other.address
257
258    def __init__(self, elf: BinaryIO):
259        self._elf = elf
260        self.sections: Tuple[Elf.Section, ...] = tuple(self._list_sections())
261
262    def _list_sections(self) -> Iterable['Elf.Section']:
263        """Reads the section headers to enumerate all ELF sections."""
264        for _ in _elf_files_in_archive(self._elf):
265            reader = FieldReader(self._elf)
266            base = reader.read(FILE_HEADER.section_header_offset)
267            section_header_size = reader.offset(
268                SECTION_HEADER.section_header_end
269            )
270
271            # Find the section with the section names in it.
272            names_section_header_base = (
273                base
274                + section_header_size
275                * reader.read(FILE_HEADER.section_names_index)
276            )
277            names_table_base = reader.read(
278                SECTION_HEADER.section_offset, names_section_header_base
279            )
280
281            base = reader.read(FILE_HEADER.section_header_offset)
282            for _ in range(reader.read(FILE_HEADER.section_count)):
283                name_offset = reader.read(
284                    SECTION_HEADER.section_name_offset, base
285                )
286
287                yield self.Section(
288                    reader.read_string(names_table_base + name_offset),
289                    reader.read(SECTION_HEADER.section_address, base),
290                    reader.read(SECTION_HEADER.section_offset, base),
291                    reader.read(SECTION_HEADER.section_size, base),
292                    reader.file_offset,
293                )
294
295                base += section_header_size
296
297    def section_by_address(self, address: int) -> Optional['Elf.Section']:
298        """Returns the section that contains the provided address, if any."""
299        # Iterate in reverse to give priority to sections with nonzero addresses
300        for section in sorted(self.sections, reverse=True):
301            if address in section.range():
302                return section
303
304        return None
305
306    def sections_with_name(self, name: str) -> Iterable['Elf.Section']:
307        for section in self.sections:
308            if section.name == name:
309                yield section
310
311    def read_value(
312        self, address: int, size: Optional[int] = None
313    ) -> Union[None, bytes, int]:
314        """Reads specified bytes or null-terminated string at address."""
315        section = self.section_by_address(address)
316        if not section:
317            return None
318
319        assert section.address <= address
320        self._elf.seek(
321            section.file_offset + section.offset + address - section.address
322        )
323
324        if size is None:
325            return read_c_string(self._elf)
326
327        return self._elf.read(size)
328
329    def dump_sections(
330        self, name: Union[str, Pattern[str]]
331    ) -> Mapping[str, bytes]:
332        """Returns a mapping of section names to section contents.
333
334        If processing an archive with multiple object files, the contents of
335        sections with duplicate names are concatenated in the order they appear
336        in the archive.
337        """
338        name_regex = re.compile(name)
339
340        sections: Mapping[str, bytearray] = collections.defaultdict(bytearray)
341        for section in self.sections:
342            if name_regex.match(section.name):
343                self._elf.seek(section.file_offset + section.offset)
344                sections[section.name].extend(self._elf.read(section.size))
345
346        return sections
347
348    def dump_section_contents(
349        self, name: Union[str, Pattern[str]]
350    ) -> Optional[bytes]:
351        """Dumps a binary string containing the sections matching the regex.
352
353        If processing an archive with multiple object files, the contents of
354        sections with duplicate names are concatenated in the order they appear
355        in the archive.
356        """
357        sections = self.dump_sections(name)
358        return b''.join(sections.values()) if sections else None
359
360    def summary(self) -> str:
361        return '\n'.join(
362            '[{0:2}] {1.address:08x} {1.offset:08x} {1.size:08x} '
363            '{1.name}'.format(i, section)
364            for i, section in enumerate(self.sections)
365        )
366
367    def __str__(self) -> str:
368        return 'Elf({}\n)'.format(
369            ''.join('\n  {},'.format(s) for s in self.sections)
370        )
371
372
373def _read_addresses(elf, size: int, output, address: Iterable[int]) -> None:
374    for addr in address:
375        value = elf.read_value(addr, size)
376
377        if value is None:
378            raise ValueError('Invalid address 0x{:08x}'.format(addr))
379
380        output(value)
381
382
383def _dump_sections(elf: Elf, output, sections: Iterable[Pattern[str]]) -> None:
384    if not sections:
385        output(elf.summary().encode())
386        return
387
388    for section_pattern in sections:
389        output(elf.dump_section_contents(section_pattern))
390
391
392def _parse_args() -> argparse.Namespace:
393    """Parses and returns command line arguments."""
394    parser = argparse.ArgumentParser(description=__doc__)
395
396    def hex_int(arg):
397        return int(arg, 16)
398
399    parser.add_argument(
400        '-e',
401        '--elf',
402        type=argparse.FileType('rb'),
403        help='the ELF file to examine',
404        required=True,
405    )
406
407    parser.add_argument(
408        '-d',
409        '--delimiter',
410        default=ord('\n'),
411        type=int,
412        help=r'delimiter to write after each value; \n by default',
413    )
414
415    parser.set_defaults(handler=lambda **_: parser.print_help())
416
417    subparsers = parser.add_subparsers(
418        help='select whether to work with addresses or whole sections'
419    )
420
421    section_parser = subparsers.add_parser('section')
422    section_parser.set_defaults(handler=_dump_sections)
423    section_parser.add_argument(
424        'sections',
425        metavar='section_regex',
426        nargs='*',
427        type=re.compile,  # type: ignore
428        help='section name regular expression',
429    )
430
431    address_parser = subparsers.add_parser('address')
432    address_parser.set_defaults(handler=_read_addresses)
433    address_parser.add_argument(
434        '--size',
435        type=int,
436        help='the size to read; reads until a null terminator by default',
437    )
438    address_parser.add_argument(
439        'address', nargs='+', type=hex_int, help='hexadecimal addresses to read'
440    )
441
442    return parser.parse_args()
443
444
445def _main(args):
446    """Calls the appropriate handler for the command line options."""
447    handler = args.handler
448    del args.handler
449
450    delim = args.delimiter
451    del args.delimiter
452
453    def output(value):
454        if value is not None:
455            sys.stdout.buffer.write(value)
456            sys.stdout.buffer.write(bytearray([delim]))
457            sys.stdout.flush()
458
459    args.output = output
460    args.elf = Elf(args.elf)
461
462    handler(**vars(args))
463
464
465if __name__ == '__main__':
466    _main(_parse_args())
467