1# 2# Copyright (C) 2017 The Android Open Source Project 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15# 16r"""This file contains an ELF parser and ELF header structures. 17 18Example usage: 19 import elf_parser 20 with elf_parser.ElfParser(file) as e: 21 print('\n'.join(e.ListGlobalDynamicSymbols())) 22 print('\n'.join(e.ListDependencies())) 23""" 24 25import ctypes 26import os 27import struct 28 29from vts.utils.python.library.elf import consts 30from vts.utils.python.library.elf import structs 31from vts.utils.python.library.elf import utils 32 33 34class ElfError(Exception): 35 """The exception raised by ElfParser.""" 36 pass 37 38 39class ElfParser(object): 40 """The class reads information from an ELF file. 41 42 Attributes: 43 _file: The ELF file object. 44 _begin_offset: The offset of the ELF object in the file. The value is 45 non-zero if the ELF is in an archive, such as .a file. 46 _file_size: Size of the file. 47 bitness: Bitness of the ELF. 48 Ehdr: An Elf_Endr, the ELF header structure of the file. 49 Shdr: A list of Elf_Shdr, the section headers of the file. 50 Elf_Addr: ELF unsigned program address type. 51 Elf_Off: ELF unsigned file offset type. 52 Elf_Half: ELF unsigned medium integer type. 53 Elf_Word: ELF unsigned integer type. 54 Elf_Sword: ELF signed integer type. 55 Elf_Ehdr: ELF header class. 56 Elf_Shdr: ELF section header class. 57 Elf_Dyn: ELF dynamic entry class. 58 Elf_Sym: ELF symbol entry class. 59 Elf_Rel: ELF relocation entry class. 60 Elf_Rela: ELF relocation entry class with explicit addend. 61 Elf_Phdr: ELF program header class. 62 Elf_Nhdr: ELF note header class. 63 """ 64 65 def __init__(self, file_path, begin_offset=0): 66 """Creates a parser to open and read an ELF file. 67 68 Args: 69 file_path: The path to the file. 70 begin_offset: The offset of the ELF object in the file. 71 72 Raises: 73 ElfError: File is not a valid ELF. 74 """ 75 self._begin_offset = begin_offset 76 try: 77 self._file = open(file_path, 'rb') 78 except IOError as e: 79 raise ElfError(e) 80 try: 81 self._file_size = os.fstat(self._file.fileno()).st_size 82 except OSError as e: 83 self.Close() 84 raise ElfError(e) 85 86 try: 87 e_ident = self._SeekRead(0, consts.EI_NIDENT) 88 89 if e_ident[:4] != consts.ELF_MAGIC_NUMBER: 90 raise ElfError('Unexpected magic bytes: {}'.format(e_ident[:4])) 91 92 if ord(e_ident[consts.EI_CLASS]) not in (consts.ELFCLASS32, 93 consts.ELFCLASS64): 94 raise ElfError('Unexpected file class: {}' 95 .format(e_ident[consts.EI_CLASS])) 96 97 if ord(e_ident[consts.EI_DATA]) != consts.ELFDATA2LSB: 98 raise ElfError('Unexpected data encoding: {}' 99 .format(e_ident[consts.EI_DATA])) 100 except ElfError: 101 self.Close() 102 raise 103 104 if ord(e_ident[consts.EI_CLASS]) == consts.ELFCLASS32: 105 self.bitness = 32 106 self.Elf_Addr = structs.Elf32_Addr 107 self.Elf_Off = structs.Elf32_Off 108 self.Elf_Half = structs.Elf32_Half 109 self.Elf_Word = structs.Elf32_Word 110 self.Elf_Sword = structs.Elf32_Sword 111 self.Elf_Ehdr = structs.Elf32_Ehdr 112 self.Elf_Shdr = structs.Elf32_Shdr 113 self.Elf_Dyn = structs.Elf32_Dyn 114 self.Elf_Sym = structs.Elf32_Sym 115 self.Elf_Rel = structs.Elf32_Rel 116 self.Elf_Rela = structs.Elf32_Rela 117 self.Elf_Phdr = structs.Elf32_Phdr 118 self.Elf_Nhdr = structs.Elf32_Nhdr 119 else: 120 self.bitness = 64 121 self.Elf_Addr = structs.Elf64_Addr 122 self.Elf_Off = structs.Elf64_Off 123 self.Elf_Half = structs.Elf64_Half 124 self.Elf_Word = structs.Elf64_Word 125 self.Elf_Sword = structs.Elf64_Sword 126 self.Elf_Ehdr = structs.Elf64_Ehdr 127 self.Elf_Shdr = structs.Elf64_Shdr 128 self.Elf_Dyn = structs.Elf64_Dyn 129 self.Elf_Sym = structs.Elf64_Sym 130 self.Elf_Rel = structs.Elf64_Rel 131 self.Elf_Rela = structs.Elf64_Rela 132 self.Elf_Phdr = structs.Elf64_Phdr 133 self.Elf_Nhdr = structs.Elf64_Nhdr 134 135 try: 136 self.Ehdr = self._SeekReadStruct(0, self.Elf_Ehdr) 137 shoff = self.Ehdr.e_shoff 138 shentsize = self.Ehdr.e_shentsize 139 self.Shdr = [self._SeekReadStruct(shoff + i * shentsize, 140 self.Elf_Shdr) 141 for i in range(self.Ehdr.e_shnum)] 142 except ElfError: 143 self.Close() 144 raise 145 146 def __del__(self): 147 """Closes the ELF file.""" 148 self.Close() 149 150 def __enter__(self): 151 return self 152 153 def __exit__(self, exc_type, exc_value, traceback): 154 """Closes the ELF file.""" 155 self.Close() 156 157 def Close(self): 158 """Closes the ELF file.""" 159 if hasattr(self, "_file"): 160 self._file.close() 161 162 def _SeekRead(self, offset, read_size): 163 """Reads a byte string at specific offset in the file. 164 165 Args: 166 offset: An integer, the offset from the beginning of the ELF. 167 read_size: An integer, number of bytes to read. 168 169 Returns: 170 A byte string which is the file content. 171 172 Raises: 173 ElfError: Fails to seek and read. 174 """ 175 if offset + read_size > self._file_size: 176 raise ElfError("Read beyond end of file.") 177 try: 178 self._file.seek(self._begin_offset + offset) 179 return self._file.read(read_size) 180 except IOError as e: 181 raise ElfError(e) 182 183 def _SeekRead8(self, offset): 184 """Reads an 1-byte integer from file.""" 185 return struct.unpack("B", self._SeekRead(offset, 1))[0] 186 187 def _SeekRead16(self, offset): 188 """Reads a 2-byte integer from file.""" 189 return struct.unpack("H", self._SeekRead(offset, 2))[0] 190 191 def _SeekRead32(self, offset): 192 """Reads a 4-byte integer from file.""" 193 return struct.unpack("I", self._SeekRead(offset, 4))[0] 194 195 def _SeekRead64(self, offset): 196 """Reads an 8-byte integer from file.""" 197 return struct.unpack("Q", self._SeekRead(offset, 8))[0] 198 199 def _SeekReadString(self, offset): 200 """Reads a null-terminated string starting from specific offset. 201 202 Args: 203 offset: The offset from the beginning of the ELF object. 204 205 Returns: 206 A byte string, excluding the null character. 207 208 Raises: 209 ElfError: String reaches end of file without null terminator. 210 """ 211 ret = "" 212 buf_size = 16 213 self._file.seek(self._begin_offset + offset) 214 while True: 215 try: 216 buf = self._file.read(buf_size) 217 except IOError as e: 218 raise ElfError(e) 219 end_index = buf.find('\0') 220 if end_index < 0: 221 ret += buf 222 else: 223 ret += buf[:end_index] 224 return ret 225 if len(buf) != buf_size: 226 raise ElfError("Null-terminated string reaches end of file.") 227 228 def _SeekReadStruct(self, offset, struct_type): 229 """Reads a ctypes.Structure / ctypes.Union from file. 230 231 Args: 232 offset: An integer, the offset from the beginning of the ELF. 233 struct_type: A class, the structure type to read. 234 235 Returns: 236 An object of struct_type. 237 238 Raises: 239 ElfError: Fails to seek and read. 240 Fails to create struct_type instance. 241 """ 242 raw_bytes = self._SeekRead(offset, ctypes.sizeof(struct_type)) 243 try: 244 return struct_type.from_buffer_copy(raw_bytes) 245 except ValueError as e: 246 raise ElfError(e) 247 248 def GetString(self, strtab, offset): 249 """Retrieves a null-terminated string from string table. 250 251 Args: 252 strtab: Section header of the string table. 253 offset: Section offset (string index) to start reading from. 254 255 Returns: 256 A string without the null terminator. 257 258 Raises: 259 ElfError: Fails to seek and read. 260 """ 261 return self._SeekReadString(strtab.sh_offset + offset) 262 263 def GetSectionName(self, sh): 264 """Returns a section name. 265 266 Args: 267 sh: A section header. 268 269 Returns: 270 A String. 271 272 Raises: 273 ElfError: Fails to seek and read. 274 """ 275 strtab = self.Shdr[self.Ehdr.e_shstrndx] 276 return self.GetString(strtab, sh.sh_name) 277 278 def GetSectionsByName(self, name): 279 """Returns a generator of section headers from a given name. 280 281 If multiple sections have the same name, yield them all. 282 283 Args: 284 name: The section name to search for. 285 286 Returns: 287 A generator of Elf_Shdr. 288 289 Raises: 290 ElfError: Fails to seek and read. 291 """ 292 return (sh for sh in self.Shdr if name == self.GetSectionName(sh)) 293 294 def GetSectionByName(self, name): 295 """Returns a section header whose name equals a given name. 296 297 Returns only the first match, assuming the section name is unique. 298 299 Args: 300 name: The section name to search for. 301 302 Returns: 303 An Elf_Shdr if found. 304 None if no sections have the given name. 305 306 Raises: 307 ElfError: Fails to seek and read. 308 """ 309 for sh in self.GetSectionsByName(name): 310 return sh 311 return None 312 313 def GetDynamic(self, dynamic): 314 """Yields the _DYNAMIC array. 315 316 Args: 317 dynamic: Section header of the dynamic section. 318 319 Yields: 320 Elf_Dyn. 321 322 Raises: 323 ElfError: Fails to seek and read. 324 """ 325 off = dynamic.sh_offset 326 num = int(dynamic.sh_size // dynamic.sh_entsize) 327 for _ in range(num): 328 dyn = self._SeekReadStruct(off, self.Elf_Dyn) 329 yield dyn 330 if dyn.d_tag == consts.DT_NULL: 331 break 332 off += dynamic.sh_entsize 333 334 def GetSymbol(self, symtab, idx): 335 """Retrieves a Elf_Sym entry from symbol table. 336 337 Args: 338 symtab: A symbol table. 339 idx: An integer, symbol table index. 340 341 Returns: 342 An Elf_Sym. 343 344 Raises: 345 ElfError: Fails to seek and read. 346 """ 347 off = symtab.sh_offset + idx * symtab.sh_entsize 348 return self._SeekReadStruct(off, self.Elf_Sym) 349 350 def GetSymbols(self, symtab): 351 """Returns a generator of Elf_Sym in symbol table. 352 353 Args: 354 symtab: A symbol table. 355 356 Returns: 357 A generator of Elf_Sym. 358 359 Raises: 360 ElfError: Fails to seek and read. 361 """ 362 num = int(symtab.sh_size // symtab.sh_entsize) 363 return (self.GetSymbol(symtab, i) for i in range(num)) 364 365 def GetRelocationSymbol(self, symtab, rel): 366 """Retrieves the Elf_Sym with respect to an Elf_Rel / Elf_Rela. 367 368 Args: 369 symtab: A symbol table. 370 rel: A Elf_Rel or Elf_Rela. 371 372 Returns: 373 An Elf_Sym. 374 375 Raises: 376 ElfError: Fails to seek and read. 377 """ 378 return self.GetSymbol(symtab, rel.GetSymbol()) 379 380 def _CreateElfRel(self, offset, info): 381 """Creates an instance of Elf_Rel. 382 383 Args: 384 offset: The initial value of r_offset. 385 info: The initial value of r_info. 386 387 Returns: 388 An Elf_Rel. 389 """ 390 elf_rel = self.Elf_Rel() 391 elf_rel.r_offset = offset 392 elf_rel.r_info = info 393 return elf_rel 394 395 def _DecodeAndroidRelr(self, rel): 396 """Decodes a SHT_RELR / SHT_ANDROID_RELR section. 397 398 Args: 399 rel: A relocation table. 400 401 Yields: 402 Elf_Rel. 403 404 Raises: 405 ElfError: Fails to seek and read. 406 """ 407 if self.bitness == 32: 408 addr_size = 4 409 seek_read_entry = self._SeekRead32 410 else: 411 addr_size = 8 412 seek_read_entry = self._SeekRead64 413 414 rel_offset = 0 415 for ent_offset in range(rel.sh_offset, rel.sh_offset + rel.sh_size, 416 rel.sh_entsize): 417 relr_entry = seek_read_entry(ent_offset) 418 if (relr_entry & 1) == 0: 419 # The entry is an address. 420 yield self._CreateElfRel(relr_entry, 0) 421 rel_offset = relr_entry + addr_size 422 else: 423 # The entry is a bitmap. 424 for bit_idx in range(1, rel.sh_entsize * 8): 425 if (relr_entry >> bit_idx) & 1: 426 yield self._CreateElfRel(rel_offset, 0) 427 rel_offset += addr_size 428 429 def GetRelocation(self, rel, idx): 430 """Retrieves a Elf_Rel / Elf_Rela entry from relocation table. 431 432 Args: 433 rel: A relocation table. 434 idx: An integer, relocation table index. 435 436 Returns: 437 An Elf_Rel or Elf_Rela. 438 439 Raises: 440 ElfError: Fails to seek and read. 441 """ 442 off = rel.sh_offset + idx * rel.sh_entsize 443 if rel.sh_type == consts.SHT_RELA: 444 return self._SeekReadStruct(off, self.Elf_Rela) 445 return self._SeekReadStruct(off, self.Elf_Rel) 446 447 def GetRelocations(self, rel): 448 """Returns a generator of Elf_Rel / Elf_Rela in relocation table. 449 450 Args: 451 rel: A relocation table. 452 453 Returns: 454 A generator of Elf_Rel or Elf_Rela. 455 456 Raises: 457 ElfError: Fails to seek and read. 458 """ 459 if rel.sh_type in (consts.SHT_ANDROID_REL, consts.SHT_ANDROID_RELA): 460 relocations = self._UnpackAndroidRela(rel) 461 if rel.sh_type == consts.SHT_ANDROID_REL: 462 return (self.Elf_Rel(r_offset=rela.r_offset, r_info=rela.r_info) 463 for rela in relocations) 464 return relocations 465 elif rel.sh_type in (consts.SHT_RELR, consts.SHT_ANDROID_RELR): 466 return self._DecodeAndroidRelr(rel) 467 else: 468 num = int(rel.sh_size // rel.sh_entsize) 469 return (self.GetRelocation(rel, i) for i in range(num)) 470 471 def _UnpackAndroidRela(self, android_rela): 472 """Unpacks a SHT_ANDROID_REL / SHT_ANDROID_RELA section. 473 474 Args: 475 android_rela: The packed section's section header. 476 477 Yields: 478 Elf_Rela. 479 480 Raises: 481 ElfError: Fails to decode android rela section. 482 """ 483 data = self._SeekRead(android_rela.sh_offset, android_rela.sh_size) 484 # Check packed section header. 485 if len(data) < 4 or data[:4] != 'APS2': 486 raise ElfError('Unexpected SHT_ANDROID_RELA header: {}' 487 .format(data[:4])) 488 # Decode SLEB128 word stream. 489 def _PackedWordsGen(): 490 cur = 4 491 while cur < len(data): 492 try: 493 value, num = utils.DecodeSLEB128(data, cur) 494 except IndexError: 495 raise ElfError('Decoding pass end of section.') 496 yield value 497 cur += num 498 raise ElfError('Decoding pass end of section.') 499 500 _packed_words_gen = _PackedWordsGen() 501 _PopWord = lambda: next(_packed_words_gen) 502 # Decode delta encoded relocation data. 503 current_count = 0 504 total_count = _PopWord() 505 offset = _PopWord() 506 addend = 0 507 while current_count < total_count: 508 # Read relocaiton group info. 509 group_size = _PopWord() 510 group_flags = _PopWord() 511 group_offset_delta = 0 512 # Read group flag and prepare delta values. 513 grouped_by_info = ( 514 group_flags & consts.RELOCATION_GROUPED_BY_INFO_FLAG) 515 grouped_by_offset_delta = ( 516 group_flags & consts.RELOCATION_GROUPED_BY_OFFSET_DELTA_FLAG) 517 grouped_by_addend = ( 518 group_flags & consts.RELOCATION_GROUPED_BY_ADDEND_FLAG) 519 group_has_addend = ( 520 group_flags & consts.RELOCATION_GROUP_HAS_ADDEND_FLAG) 521 if grouped_by_offset_delta: 522 group_offset_delta = _PopWord() 523 if grouped_by_info: 524 info = _PopWord() 525 if group_has_addend and grouped_by_addend: 526 addend += _PopWord() 527 if not group_has_addend: 528 addend = 0 529 # Handle each relocation entry in group. 530 for _ in range(group_size): 531 if grouped_by_offset_delta: 532 offset += group_offset_delta 533 else: 534 offset += _PopWord() 535 if not grouped_by_info: 536 info = _PopWord() 537 if group_has_addend and not grouped_by_addend: 538 addend += _PopWord() 539 540 relocation = self.Elf_Rela(r_offset=offset, 541 r_info=info, 542 r_addend=addend) 543 yield relocation 544 current_count += group_size 545 546 def _LoadDtNeeded(self, dynamic): 547 """Reads DT_NEEDED entries from dynamic section. 548 549 Args: 550 dynamic: Section header of the dynamic section. 551 552 Returns: 553 A list of strings, the names of libraries. 554 555 Raises: 556 ElfError: Fails to find dynamic string table. 557 """ 558 strtab_addr = None 559 name_offsets = [] 560 for dyn in self.GetDynamic(dynamic): 561 if dyn.d_tag == consts.DT_NEEDED: 562 name_offsets.append(dyn.d_un.d_val) 563 elif dyn.d_tag == consts.DT_STRTAB: 564 strtab_addr = dyn.d_un.d_ptr 565 566 if strtab_addr is None: 567 raise ElfError("Cannot find string table address in dynamic" 568 " section.") 569 try: 570 strtab = next(sh for sh in self.Shdr if sh.sh_addr == strtab_addr) 571 except StopIteration: 572 raise ElfError("Cannot find dynamic string table.") 573 return [self.GetString(strtab, off) for off in name_offsets] 574 575 def IsExecutable(self): 576 """Returns whether the ELF is executable.""" 577 return self.Ehdr.e_type == consts.ET_EXEC 578 579 def IsSharedObject(self): 580 """Returns whether the ELF is a shared object.""" 581 return self.Ehdr.e_type == consts.ET_DYN 582 583 def HasAndroidIdent(self): 584 """Returns whether the ELF has a .note.android.ident section.""" 585 for sh in self.GetSectionsByName(".note.android.ident"): 586 nh = self._SeekReadStruct(sh.sh_offset, self.Elf_Nhdr) 587 name = self._SeekRead(sh.sh_offset + ctypes.sizeof(self.Elf_Nhdr), 588 nh.n_namesz) 589 if name == b"Android\0": 590 return True 591 return False 592 593 def MatchCpuAbi(self, abi): 594 """Returns whether the ELF matches the ABI. 595 596 Args: 597 abi: A string, the name of the ABI. 598 599 Returns: 600 A boolean, whether the ELF matches the ABI. 601 """ 602 for abi_prefix, machine in (("arm64", consts.EM_AARCH64), 603 ("arm", consts.EM_ARM), 604 ("mips64", consts.EM_MIPS), 605 ("mips", consts.EM_MIPS), 606 ("x86_64", consts.EM_X86_64), 607 ("x86", consts.EM_386)): 608 if abi.startswith(abi_prefix): 609 return self.Ehdr.e_machine == machine 610 return False 611 612 def ListDependencies(self): 613 """Lists the shared libraries that the ELF depends on. 614 615 Returns: 616 A list of strings, the names of the depended libraries. 617 """ 618 deps = [] 619 for sh in self.Shdr: 620 if sh.sh_type == consts.SHT_DYNAMIC: 621 deps.extend(self._LoadDtNeeded(sh)) 622 return deps 623 624 def ListGlobalSymbols(self, include_weak=False, 625 symtab_name=consts.SYMTAB, 626 strtab_name=consts.STRTAB): 627 """Lists the global symbols defined in the ELF. 628 629 Args: 630 include_weak: A boolean, whether to include weak symbols. 631 symtab_name: A string, the name of the symbol table. 632 strtab_name: A string, the name of the string table. 633 634 Returns: 635 A list of strings, the names of the symbols. 636 637 Raises: 638 ElfError: Fails to find symbol table. 639 """ 640 symtab = self.GetSectionByName(symtab_name) 641 strtab = self.GetSectionByName(strtab_name) 642 if not symtab or not strtab or symtab.sh_size == 0: 643 raise ElfError("Cannot find symbol table.") 644 645 include_bindings = [consts.STB_GLOBAL] 646 if include_weak: 647 include_bindings.append(consts.STB_WEAK) 648 649 sym_names = [] 650 for sym in self.GetSymbols(symtab): 651 # Global symbols can be defined at most once at link time, 652 # while weak symbols may have multiple definitions. 653 if sym.GetType() == consts.STT_NOTYPE: 654 continue 655 if sym.GetBinding() not in include_bindings: 656 continue 657 if sym.st_shndx == consts.SHN_UNDEF: 658 continue 659 sym_names.append(self.GetString(strtab, sym.st_name)) 660 return sym_names 661 662 def ListGlobalDynamicSymbols(self, include_weak=False): 663 """Lists the global dynamic symbols defined in the ELF. 664 665 Args: 666 include_weak: A boolean, whether to include weak symbols. 667 668 Returns: 669 A list of strings, the names of the symbols. 670 671 Raises: 672 ElfError: Fails to find symbol table. 673 """ 674 return self.ListGlobalSymbols(include_weak, 675 consts.DYNSYM, consts.DYNSTR) 676 677 def GetProgramInterpreter(self): 678 """Gets the path to the program interpreter of the ELF. 679 680 Returns: 681 A string, the contents of .interp section. 682 None if the section is not found. 683 """ 684 for ph_index in range(self.Ehdr.e_phnum): 685 ph = self._SeekReadStruct( 686 self.Ehdr.e_phoff + ph_index * self.Ehdr.e_phentsize, 687 self.Elf_Phdr) 688 if ph.p_type == consts.PT_INTERP: 689 return self._SeekReadString(ph.p_offset) 690