1#!/usr/bin/env python3 2# Copyright 2020 The Pigweed Authors 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); you may not 5# use this file except in compliance with the License. You may obtain a copy of 6# the License at 7# 8# https://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13# License for the specific language governing permissions and limitations under 14# the License. 15"""Reads data from ELF sections. 16 17This module provides tools for dumping the contents of an ELF section. It can 18also be used to read values at a particular address. A command line interface 19for both of these features is provided. 20 21This module supports any ELF-format file, including .o and .so files. This 22module also has basic support for archive (.a) files. All ELF files in an 23archive are read as one unit. 24""" 25 26import argparse 27from pathlib import Path 28import re 29import struct 30import sys 31from typing import BinaryIO, Dict, Iterable, NamedTuple, Optional 32from typing import Pattern, Tuple, Union 33 34ARCHIVE_MAGIC = b'!<arch>\n' 35ELF_MAGIC = b'\x7fELF' 36 37 38def _check_next_bytes(fd: BinaryIO, expected: bytes, what: str) -> None: 39 actual = fd.read(len(expected)) 40 if expected != actual: 41 raise FileDecodeError( 42 f'Invalid {what}: expected {expected!r}, found {actual!r} in file ' 43 f'{getattr(fd, "name", "(unknown")}') 44 45 46def files_in_archive(fd: BinaryIO) -> Iterable[int]: 47 """Seeks to each file in an archive and yields its size.""" 48 49 _check_next_bytes(fd, ARCHIVE_MAGIC, 'archive magic number') 50 51 while True: 52 # In some archives, the first file ends with an additional \n. If that 53 # is present, skip it. 54 if fd.read(1) != b'\n': 55 fd.seek(-1, 1) 56 57 # Each file in an archive is prefixed with an ASCII header: 58 # 59 # 16 B - file identifier (text) 60 # 12 B - file modification timestamp (decimal) 61 # 6 B - owner ID (decimal) 62 # 6 B - group ID (decimal) 63 # 8 B - file mode (octal) 64 # 10 B - file size in bytes (decimal) 65 # 2 B - ending characters (`\n) 66 # 67 # Skip the unused portions of the file header, then read the size. 68 fd.seek(16 + 12 + 6 + 6 + 8, 1) 69 size_str = fd.read(10) 70 if not size_str: 71 return 72 73 try: 74 size = int(size_str, 10) 75 except ValueError as exc: 76 raise FileDecodeError( 77 'Archive file sizes must be decimal integers') from exc 78 79 _check_next_bytes(fd, b'`\n', 'archive file header ending') 80 offset = fd.tell() # Store offset in case the caller reads the file. 81 82 yield size 83 84 fd.seek(offset + size) 85 86 87def _elf_files_in_archive(fd: BinaryIO): 88 if _bytes_match(fd, ELF_MAGIC): 89 yield # The value isn't used, so just yield None. 90 else: 91 for _ in files_in_archive(fd): 92 if _bytes_match(fd, ELF_MAGIC): 93 yield 94 95 96class Field(NamedTuple): 97 """A field in an ELF file. 98 99 Fields refer to a particular piece of data in an ELF file or section header. 100 """ 101 102 name: str 103 offset_32: int 104 offset_64: int 105 size_32: int 106 size_64: int 107 108 109class _FileHeader(NamedTuple): 110 """Fields in the ELF file header.""" 111 112 section_header_offset: Field = Field('e_shoff', 0x20, 0x28, 4, 8) 113 section_count: Field = Field('e_shnum', 0x30, 0x3C, 2, 2) 114 section_names_index: Field = Field('e_shstrndx', 0x32, 0x3E, 2, 2) 115 116 117FILE_HEADER = _FileHeader() 118 119 120class _SectionHeader(NamedTuple): 121 """Fields in an ELF section header.""" 122 123 section_name_offset: Field = Field('sh_name', 0x00, 0x00, 4, 4) 124 section_address: Field = Field('sh_addr', 0x0C, 0x10, 4, 8) 125 section_offset: Field = Field('sh_offset', 0x10, 0x18, 4, 8) 126 section_size: Field = Field('sh_size', 0x14, 0x20, 4, 8) 127 128 # section_header_end records the size of the header. 129 section_header_end: Field = Field('section end', 0x28, 0x40, 0, 0) 130 131 132SECTION_HEADER = _SectionHeader() 133 134 135def read_c_string(fd: BinaryIO) -> bytes: 136 """Reads a null-terminated string from the provided file descriptor.""" 137 string = bytearray() 138 while True: 139 byte = fd.read(1) 140 if not byte or byte == b'\0': 141 return bytes(string) 142 string += byte 143 144 145def _bytes_match(fd: BinaryIO, expected: bytes) -> bool: 146 """Peeks at the next bytes to see if they match the expected.""" 147 try: 148 offset = fd.tell() 149 data = fd.read(len(expected)) 150 fd.seek(offset) 151 return data == expected 152 except IOError: 153 return False 154 155 156def compatible_file(file: Union[BinaryIO, str, Path]) -> bool: 157 """True if the file type is supported (ELF or archive).""" 158 try: 159 fd = open(file, 'rb') if isinstance(file, (str, Path)) else file 160 161 offset = fd.tell() 162 fd.seek(0) 163 result = _bytes_match(fd, ELF_MAGIC) or _bytes_match(fd, ARCHIVE_MAGIC) 164 fd.seek(offset) 165 finally: 166 if isinstance(file, (str, Path)): 167 fd.close() 168 169 return result 170 171 172class FileDecodeError(Exception): 173 """Invalid data was read from an ELF file.""" 174 175 176class FieldReader: 177 """Reads ELF fields defined with a Field tuple from an ELF file.""" 178 def __init__(self, elf: BinaryIO): 179 self._elf = elf 180 self.file_offset = self._elf.tell() 181 182 _check_next_bytes(self._elf, ELF_MAGIC, 'ELF file header') 183 size_field = self._elf.read(1) # e_ident[EI_CLASS] (address size) 184 185 int_unpacker = self._determine_integer_format() 186 187 if size_field == b'\x01': 188 self.offset = lambda field: field.offset_32 189 self._size = lambda field: field.size_32 190 self._decode = lambda f, d: int_unpacker[f.size_32].unpack(d)[0] 191 elif size_field == b'\x02': 192 self.offset = lambda field: field.offset_64 193 self._size = lambda field: field.size_64 194 self._decode = lambda f, d: int_unpacker[f.size_64].unpack(d)[0] 195 else: 196 raise FileDecodeError('Unknown size {!r}'.format(size_field)) 197 198 def _determine_integer_format(self) -> Dict[int, struct.Struct]: 199 """Returns a dict of structs used for converting bytes to integers.""" 200 endianness_byte = self._elf.read(1) # e_ident[EI_DATA] (endianness) 201 if endianness_byte == b'\x01': 202 endianness = '<' 203 elif endianness_byte == b'\x02': 204 endianness = '>' 205 else: 206 raise FileDecodeError( 207 'Unknown endianness {!r}'.format(endianness_byte)) 208 209 return { 210 1: struct.Struct(endianness + 'B'), 211 2: struct.Struct(endianness + 'H'), 212 4: struct.Struct(endianness + 'I'), 213 8: struct.Struct(endianness + 'Q'), 214 } 215 216 def read(self, field: Field, base: int = 0) -> int: 217 self._elf.seek(self.file_offset + base + self.offset(field)) 218 data = self._elf.read(self._size(field)) 219 return self._decode(field, data) 220 221 def read_string(self, offset: int) -> str: 222 self._elf.seek(self.file_offset + offset) 223 return read_c_string(self._elf).decode() 224 225 226class Elf: 227 """Represents an ELF file and the sections in it.""" 228 class Section(NamedTuple): 229 """Info about a section in an ELF file.""" 230 name: str 231 address: int 232 offset: int 233 size: int 234 235 file_offset: int # Starting place in the file; 0 unless in an archive. 236 237 def range(self) -> range: 238 return range(self.address, self.address + self.size) 239 240 def __lt__(self, other) -> bool: 241 return self.address < other.address 242 243 def __init__(self, elf: BinaryIO): 244 self._elf = elf 245 self.sections: Tuple[Elf.Section, ...] = tuple(self._list_sections()) 246 247 def _list_sections(self) -> Iterable['Elf.Section']: 248 """Reads the section headers to enumerate all ELF sections.""" 249 for _ in _elf_files_in_archive(self._elf): 250 reader = FieldReader(self._elf) 251 base = reader.read(FILE_HEADER.section_header_offset) 252 section_header_size = reader.offset( 253 SECTION_HEADER.section_header_end) 254 255 # Find the section with the section names in it. 256 names_section_header_base = ( 257 base + section_header_size * 258 reader.read(FILE_HEADER.section_names_index)) 259 names_table_base = reader.read(SECTION_HEADER.section_offset, 260 names_section_header_base) 261 262 base = reader.read(FILE_HEADER.section_header_offset) 263 for _ in range(reader.read(FILE_HEADER.section_count)): 264 name_offset = reader.read(SECTION_HEADER.section_name_offset, 265 base) 266 267 yield self.Section( 268 reader.read_string(names_table_base + name_offset), 269 reader.read(SECTION_HEADER.section_address, base), 270 reader.read(SECTION_HEADER.section_offset, base), 271 reader.read(SECTION_HEADER.section_size, base), 272 reader.file_offset) 273 274 base += section_header_size 275 276 def section_by_address(self, address: int) -> Optional['Elf.Section']: 277 """Returns the section that contains the provided address, if any.""" 278 # Iterate in reverse to give priority to sections with nonzero addresses 279 for section in sorted(self.sections, reverse=True): 280 if address in section.range(): 281 return section 282 283 return None 284 285 def sections_with_name(self, name: str) -> Iterable['Elf.Section']: 286 for section in self.sections: 287 if section.name == name: 288 yield section 289 290 def read_value(self, 291 address: int, 292 size: Optional[int] = None) -> Union[None, bytes, int]: 293 """Reads specified bytes or null-terminated string at address.""" 294 section = self.section_by_address(address) 295 if not section: 296 return None 297 298 assert section.address <= address 299 self._elf.seek(section.file_offset + section.offset + address - 300 section.address) 301 302 if size is None: 303 return read_c_string(self._elf) 304 305 return self._elf.read(size) 306 307 def dump_sections(self, name: Union[str, 308 Pattern[str]]) -> Dict[str, bytes]: 309 """Dumps a binary string containing the sections matching the regex.""" 310 name_regex = re.compile(name) 311 312 sections: Dict[str, bytes] = {} 313 for section in self.sections: 314 if name_regex.match(section.name): 315 self._elf.seek(section.file_offset + section.offset) 316 sections[section.name] = self._elf.read(section.size) 317 318 return sections 319 320 def dump_section_contents( 321 self, name: Union[str, Pattern[str]]) -> Optional[bytes]: 322 sections = self.dump_sections(name) 323 return b''.join(sections.values()) if sections else None 324 325 def summary(self) -> str: 326 return '\n'.join( 327 '[{0:2}] {1.address:08x} {1.offset:08x} {1.size:08x} {1.name}'. 328 format(i, section) for i, section in enumerate(self.sections)) 329 330 def __str__(self) -> str: 331 return 'Elf({}\n)'.format(''.join('\n {},'.format(s) 332 for s in self.sections)) 333 334 335def _read_addresses(elf, size: int, output, address: Iterable[int]) -> None: 336 for addr in address: 337 value = elf.read_value(addr, size) 338 339 if value is None: 340 raise ValueError('Invalid address 0x{:08x}'.format(addr)) 341 342 output(value) 343 344 345def _dump_sections(elf: Elf, output, sections: Iterable[Pattern[str]]) -> None: 346 if not sections: 347 output(elf.summary().encode()) 348 return 349 350 for section_pattern in sections: 351 output(elf.dump_section_contents(section_pattern)) 352 353 354def _parse_args() -> argparse.Namespace: 355 """Parses and returns command line arguments.""" 356 parser = argparse.ArgumentParser(description=__doc__) 357 358 def hex_int(arg): 359 return int(arg, 16) 360 361 parser.add_argument('-e', 362 '--elf', 363 type=argparse.FileType('rb'), 364 help='the ELF file to examine', 365 required=True) 366 367 parser.add_argument( 368 '-d', 369 '--delimiter', 370 default=ord('\n'), 371 type=int, 372 help=r'delimiter to write after each value; \n by default') 373 374 parser.set_defaults(handler=lambda **_: parser.print_help()) 375 376 subparsers = parser.add_subparsers( 377 help='select whether to work with addresses or whole sections') 378 379 section_parser = subparsers.add_parser('section') 380 section_parser.set_defaults(handler=_dump_sections) 381 section_parser.add_argument( 382 'sections', 383 metavar='section_regex', 384 nargs='*', 385 type=re.compile, # type: ignore 386 help='section name regular expression') 387 388 address_parser = subparsers.add_parser('address') 389 address_parser.set_defaults(handler=_read_addresses) 390 address_parser.add_argument( 391 '--size', 392 type=int, 393 help='the size to read; reads until a null terminator by default') 394 address_parser.add_argument('address', 395 nargs='+', 396 type=hex_int, 397 help='hexadecimal addresses to read') 398 399 return parser.parse_args() 400 401 402def _main(args): 403 """Calls the appropriate handler for the command line options.""" 404 handler = args.handler 405 del args.handler 406 407 delim = args.delimiter 408 del args.delimiter 409 410 def output(value): 411 if value is not None: 412 sys.stdout.buffer.write(value) 413 sys.stdout.buffer.write(bytearray([delim])) 414 sys.stdout.flush() 415 416 args.output = output 417 args.elf = Elf(args.elf) 418 419 handler(**vars(args)) 420 421 422if __name__ == '__main__': 423 _main(_parse_args()) 424