1#!/usr/bin/env python3 2# Copyright 2020 The Pigweed Authors 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); you may not 5# use this file except in compliance with the License. You may obtain a copy of 6# the License at 7# 8# https://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13# License for the specific language governing permissions and limitations under 14# the License. 15"""Reads data from ELF sections. 16 17This module provides tools for dumping the contents of an ELF section. It can 18also be used to read values at a particular address. A command line interface 19for both of these features is provided. 20 21This module supports any ELF-format file, including .o and .so files. This 22module also has basic support for archive (.a) files. All ELF files in an 23archive are read as one unit. 24""" 25 26import argparse 27import collections 28from pathlib import Path 29import re 30import struct 31import sys 32from typing import ( 33 BinaryIO, 34 Iterable, 35 Mapping, 36 NamedTuple, 37 Optional, 38 Pattern, 39 Tuple, 40 Union, 41) 42 43ARCHIVE_MAGIC = b'!<arch>\n' 44ELF_MAGIC = b'\x7fELF' 45 46 47def _check_next_bytes(fd: BinaryIO, expected: bytes, what: str) -> None: 48 actual = fd.read(len(expected)) 49 if expected != actual: 50 raise FileDecodeError( 51 f'Invalid {what}: expected {expected!r}, found {actual!r} in file ' 52 f'{getattr(fd, "name", "(unknown")}' 53 ) 54 55 56def files_in_archive(fd: BinaryIO) -> Iterable[int]: 57 """Seeks to each file in an archive and yields its size.""" 58 59 _check_next_bytes(fd, ARCHIVE_MAGIC, 'archive magic number') 60 61 while True: 62 # In some archives, the first file ends with an additional \n. If that 63 # is present, skip it. 64 if fd.read(1) != b'\n': 65 fd.seek(-1, 1) 66 67 # Each file in an archive is prefixed with an ASCII header: 68 # 69 # 16 B - file identifier (text) 70 # 12 B - file modification timestamp (decimal) 71 # 6 B - owner ID (decimal) 72 # 6 B - group ID (decimal) 73 # 8 B - file mode (octal) 74 # 10 B - file size in bytes (decimal) 75 # 2 B - ending characters (`\n) 76 # 77 # Skip the unused portions of the file header, then read the size. 78 fd.seek(16 + 12 + 6 + 6 + 8, 1) 79 size_str = fd.read(10) 80 if not size_str: 81 return 82 83 try: 84 size = int(size_str, 10) 85 except ValueError as exc: 86 raise FileDecodeError( 87 'Archive file sizes must be decimal integers' 88 ) from exc 89 90 _check_next_bytes(fd, b'`\n', 'archive file header ending') 91 offset = fd.tell() # Store offset in case the caller reads the file. 92 93 yield size 94 95 fd.seek(offset + size) 96 97 98def _elf_files_in_archive(fd: BinaryIO): 99 if _bytes_match(fd, ELF_MAGIC): 100 yield # The value isn't used, so just yield None. 101 else: 102 for _ in files_in_archive(fd): 103 if _bytes_match(fd, ELF_MAGIC): 104 yield 105 106 107class Field(NamedTuple): 108 """A field in an ELF file. 109 110 Fields refer to a particular piece of data in an ELF file or section header. 111 """ 112 113 name: str 114 offset_32: int 115 offset_64: int 116 size_32: int 117 size_64: int 118 119 120class _FileHeader(NamedTuple): 121 """Fields in the ELF file header.""" 122 123 section_header_offset: Field = Field('e_shoff', 0x20, 0x28, 4, 8) 124 section_count: Field = Field('e_shnum', 0x30, 0x3C, 2, 2) 125 section_names_index: Field = Field('e_shstrndx', 0x32, 0x3E, 2, 2) 126 127 128FILE_HEADER = _FileHeader() 129 130 131class _SectionHeader(NamedTuple): 132 """Fields in an ELF section header.""" 133 134 section_name_offset: Field = Field('sh_name', 0x00, 0x00, 4, 4) 135 section_address: Field = Field('sh_addr', 0x0C, 0x10, 4, 8) 136 section_offset: Field = Field('sh_offset', 0x10, 0x18, 4, 8) 137 section_size: Field = Field('sh_size', 0x14, 0x20, 4, 8) 138 139 # section_header_end records the size of the header. 140 section_header_end: Field = Field('section end', 0x28, 0x40, 0, 0) 141 142 143SECTION_HEADER = _SectionHeader() 144 145 146def read_c_string(fd: BinaryIO) -> bytes: 147 """Reads a null-terminated string from the provided file descriptor.""" 148 string = bytearray() 149 while True: 150 byte = fd.read(1) 151 if not byte or byte == b'\0': 152 return bytes(string) 153 string += byte 154 155 156def _bytes_match(fd: BinaryIO, expected: bytes) -> bool: 157 """Peeks at the next bytes to see if they match the expected.""" 158 try: 159 offset = fd.tell() 160 data = fd.read(len(expected)) 161 fd.seek(offset) 162 return data == expected 163 except IOError: 164 return False 165 166 167def compatible_file(file: Union[BinaryIO, str, Path]) -> bool: 168 """True if the file type is supported (ELF or archive).""" 169 try: 170 fd = open(file, 'rb') if isinstance(file, (str, Path)) else file 171 172 offset = fd.tell() 173 fd.seek(0) 174 result = _bytes_match(fd, ELF_MAGIC) or _bytes_match(fd, ARCHIVE_MAGIC) 175 fd.seek(offset) 176 finally: 177 if isinstance(file, (str, Path)): 178 fd.close() 179 180 return result 181 182 183class FileDecodeError(Exception): 184 """Invalid data was read from an ELF file.""" 185 186 187class FieldReader: 188 """Reads ELF fields defined with a Field tuple from an ELF file.""" 189 190 def __init__(self, elf: BinaryIO): 191 self._elf = elf 192 self.file_offset = self._elf.tell() 193 194 _check_next_bytes(self._elf, ELF_MAGIC, 'ELF file header') 195 size_field = self._elf.read(1) # e_ident[EI_CLASS] (address size) 196 197 int_unpacker = self._determine_integer_format() 198 199 if size_field == b'\x01': 200 self.offset = lambda field: field.offset_32 201 self._size = lambda field: field.size_32 202 self._decode = lambda f, d: int_unpacker[f.size_32].unpack(d)[0] 203 elif size_field == b'\x02': 204 self.offset = lambda field: field.offset_64 205 self._size = lambda field: field.size_64 206 self._decode = lambda f, d: int_unpacker[f.size_64].unpack(d)[0] 207 else: 208 raise FileDecodeError('Unknown size {!r}'.format(size_field)) 209 210 def _determine_integer_format(self) -> Mapping[int, struct.Struct]: 211 """Returns a dict of structs used for converting bytes to integers.""" 212 endianness_byte = self._elf.read(1) # e_ident[EI_DATA] (endianness) 213 if endianness_byte == b'\x01': 214 endianness = '<' 215 elif endianness_byte == b'\x02': 216 endianness = '>' 217 else: 218 raise FileDecodeError( 219 'Unknown endianness {!r}'.format(endianness_byte) 220 ) 221 222 return { 223 1: struct.Struct(endianness + 'B'), 224 2: struct.Struct(endianness + 'H'), 225 4: struct.Struct(endianness + 'I'), 226 8: struct.Struct(endianness + 'Q'), 227 } 228 229 def read(self, field: Field, base: int = 0) -> int: 230 self._elf.seek(self.file_offset + base + self.offset(field)) 231 data = self._elf.read(self._size(field)) 232 return self._decode(field, data) 233 234 def read_string(self, offset: int) -> str: 235 self._elf.seek(self.file_offset + offset) 236 return read_c_string(self._elf).decode() 237 238 239class Elf: 240 """Represents an ELF file and the sections in it.""" 241 242 class Section(NamedTuple): 243 """Info about a section in an ELF file.""" 244 245 name: str 246 address: int 247 offset: int 248 size: int 249 250 file_offset: int # Starting place in the file; 0 unless in an archive. 251 252 def range(self) -> range: 253 return range(self.address, self.address + self.size) 254 255 def __lt__(self, other) -> bool: 256 return self.address < other.address 257 258 def __init__(self, elf: BinaryIO): 259 self._elf = elf 260 self.sections: Tuple[Elf.Section, ...] = tuple(self._list_sections()) 261 262 def _list_sections(self) -> Iterable['Elf.Section']: 263 """Reads the section headers to enumerate all ELF sections.""" 264 for _ in _elf_files_in_archive(self._elf): 265 reader = FieldReader(self._elf) 266 base = reader.read(FILE_HEADER.section_header_offset) 267 section_header_size = reader.offset( 268 SECTION_HEADER.section_header_end 269 ) 270 271 # Find the section with the section names in it. 272 names_section_header_base = ( 273 base 274 + section_header_size 275 * reader.read(FILE_HEADER.section_names_index) 276 ) 277 names_table_base = reader.read( 278 SECTION_HEADER.section_offset, names_section_header_base 279 ) 280 281 base = reader.read(FILE_HEADER.section_header_offset) 282 for _ in range(reader.read(FILE_HEADER.section_count)): 283 name_offset = reader.read( 284 SECTION_HEADER.section_name_offset, base 285 ) 286 287 yield self.Section( 288 reader.read_string(names_table_base + name_offset), 289 reader.read(SECTION_HEADER.section_address, base), 290 reader.read(SECTION_HEADER.section_offset, base), 291 reader.read(SECTION_HEADER.section_size, base), 292 reader.file_offset, 293 ) 294 295 base += section_header_size 296 297 def section_by_address(self, address: int) -> Optional['Elf.Section']: 298 """Returns the section that contains the provided address, if any.""" 299 # Iterate in reverse to give priority to sections with nonzero addresses 300 for section in sorted(self.sections, reverse=True): 301 if address in section.range(): 302 return section 303 304 return None 305 306 def sections_with_name(self, name: str) -> Iterable['Elf.Section']: 307 for section in self.sections: 308 if section.name == name: 309 yield section 310 311 def read_value( 312 self, address: int, size: Optional[int] = None 313 ) -> Union[None, bytes, int]: 314 """Reads specified bytes or null-terminated string at address.""" 315 section = self.section_by_address(address) 316 if not section: 317 return None 318 319 assert section.address <= address 320 self._elf.seek( 321 section.file_offset + section.offset + address - section.address 322 ) 323 324 if size is None: 325 return read_c_string(self._elf) 326 327 return self._elf.read(size) 328 329 def dump_sections( 330 self, name: Union[str, Pattern[str]] 331 ) -> Mapping[str, bytes]: 332 """Returns a mapping of section names to section contents. 333 334 If processing an archive with multiple object files, the contents of 335 sections with duplicate names are concatenated in the order they appear 336 in the archive. 337 """ 338 name_regex = re.compile(name) 339 340 sections: Mapping[str, bytearray] = collections.defaultdict(bytearray) 341 for section in self.sections: 342 if name_regex.match(section.name): 343 self._elf.seek(section.file_offset + section.offset) 344 sections[section.name].extend(self._elf.read(section.size)) 345 346 return sections 347 348 def dump_section_contents( 349 self, name: Union[str, Pattern[str]] 350 ) -> Optional[bytes]: 351 """Dumps a binary string containing the sections matching the regex. 352 353 If processing an archive with multiple object files, the contents of 354 sections with duplicate names are concatenated in the order they appear 355 in the archive. 356 """ 357 sections = self.dump_sections(name) 358 return b''.join(sections.values()) if sections else None 359 360 def summary(self) -> str: 361 return '\n'.join( 362 '[{0:2}] {1.address:08x} {1.offset:08x} {1.size:08x} ' 363 '{1.name}'.format(i, section) 364 for i, section in enumerate(self.sections) 365 ) 366 367 def __str__(self) -> str: 368 return 'Elf({}\n)'.format( 369 ''.join('\n {},'.format(s) for s in self.sections) 370 ) 371 372 373def _read_addresses(elf, size: int, output, address: Iterable[int]) -> None: 374 for addr in address: 375 value = elf.read_value(addr, size) 376 377 if value is None: 378 raise ValueError('Invalid address 0x{:08x}'.format(addr)) 379 380 output(value) 381 382 383def _dump_sections(elf: Elf, output, sections: Iterable[Pattern[str]]) -> None: 384 if not sections: 385 output(elf.summary().encode()) 386 return 387 388 for section_pattern in sections: 389 output(elf.dump_section_contents(section_pattern)) 390 391 392def _parse_args() -> argparse.Namespace: 393 """Parses and returns command line arguments.""" 394 parser = argparse.ArgumentParser(description=__doc__) 395 396 def hex_int(arg): 397 return int(arg, 16) 398 399 parser.add_argument( 400 '-e', 401 '--elf', 402 type=argparse.FileType('rb'), 403 help='the ELF file to examine', 404 required=True, 405 ) 406 407 parser.add_argument( 408 '-d', 409 '--delimiter', 410 default=ord('\n'), 411 type=int, 412 help=r'delimiter to write after each value; \n by default', 413 ) 414 415 parser.set_defaults(handler=lambda **_: parser.print_help()) 416 417 subparsers = parser.add_subparsers( 418 help='select whether to work with addresses or whole sections' 419 ) 420 421 section_parser = subparsers.add_parser('section') 422 section_parser.set_defaults(handler=_dump_sections) 423 section_parser.add_argument( 424 'sections', 425 metavar='section_regex', 426 nargs='*', 427 type=re.compile, # type: ignore 428 help='section name regular expression', 429 ) 430 431 address_parser = subparsers.add_parser('address') 432 address_parser.set_defaults(handler=_read_addresses) 433 address_parser.add_argument( 434 '--size', 435 type=int, 436 help='the size to read; reads until a null terminator by default', 437 ) 438 address_parser.add_argument( 439 'address', nargs='+', type=hex_int, help='hexadecimal addresses to read' 440 ) 441 442 return parser.parse_args() 443 444 445def _main(args): 446 """Calls the appropriate handler for the command line options.""" 447 handler = args.handler 448 del args.handler 449 450 delim = args.delimiter 451 del args.delimiter 452 453 def output(value): 454 if value is not None: 455 sys.stdout.buffer.write(value) 456 sys.stdout.buffer.write(bytearray([delim])) 457 sys.stdout.flush() 458 459 args.output = output 460 args.elf = Elf(args.elf) 461 462 handler(**vars(args)) 463 464 465if __name__ == '__main__': 466 _main(_parse_args()) 467