1#!/usr/bin/env python3 2# 3# Copyright (C) 2019 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""ELF file checker. 18 19This command ensures all undefined symbols in an ELF file can be resolved to 20global (or weak) symbols defined in shared objects specified in DT_NEEDED 21entries. 22""" 23 24from __future__ import print_function 25 26import argparse 27import collections 28import os 29import os.path 30import re 31import struct 32import subprocess 33import sys 34 35 36_ELF_MAGIC = b'\x7fELF' 37 38 39# Known machines 40_EM_386 = 3 41_EM_ARM = 40 42_EM_X86_64 = 62 43_EM_AARCH64 = 183 44 45_32_BIT_MACHINES = {_EM_386, _EM_ARM} 46_64_BIT_MACHINES = {_EM_X86_64, _EM_AARCH64} 47_KNOWN_MACHINES = _32_BIT_MACHINES | _64_BIT_MACHINES 48 49# ELF header struct 50_ELF_HEADER_STRUCT = ( 51 ('ei_magic', '4s'), 52 ('ei_class', 'B'), 53 ('ei_data', 'B'), 54 ('ei_version', 'B'), 55 ('ei_osabi', 'B'), 56 ('ei_pad', '8s'), 57 ('e_type', 'H'), 58 ('e_machine', 'H'), 59 ('e_version', 'I'), 60) 61 62_ELF_HEADER_STRUCT_FMT = ''.join(_fmt for _, _fmt in _ELF_HEADER_STRUCT) 63 64 65ELFHeader = collections.namedtuple( 66 'ELFHeader', [_name for _name, _ in _ELF_HEADER_STRUCT]) 67 68 69ELF = collections.namedtuple( 70 'ELF', 71 ('alignments', 'dt_soname', 'dt_needed', 'imported', 'exported', 'header')) 72 73 74def _get_os_name(): 75 """Get the host OS name.""" 76 if sys.platform.startswith('linux'): 77 return 'linux' 78 if sys.platform.startswith('darwin'): 79 return 'darwin' 80 raise ValueError(sys.platform + ' is not supported') 81 82 83def _get_build_top(): 84 """Find the build top of the source tree ($ANDROID_BUILD_TOP).""" 85 prev_path = None 86 curr_path = os.path.abspath(os.getcwd()) 87 while prev_path != curr_path: 88 if os.path.exists(os.path.join(curr_path, '.repo')): 89 return curr_path 90 prev_path = curr_path 91 curr_path = os.path.dirname(curr_path) 92 return None 93 94 95def _select_latest_llvm_version(versions): 96 """Select the latest LLVM prebuilts version from a set of versions.""" 97 pattern = re.compile('clang-r([0-9]+)([a-z]?)') 98 found_rev = 0 99 found_ver = None 100 for curr_ver in versions: 101 match = pattern.match(curr_ver) 102 if not match: 103 continue 104 curr_rev = int(match.group(1)) 105 if not found_ver or curr_rev > found_rev or ( 106 curr_rev == found_rev and curr_ver > found_ver): 107 found_rev = curr_rev 108 found_ver = curr_ver 109 return found_ver 110 111 112def _get_latest_llvm_version(llvm_dir): 113 """Find the latest LLVM prebuilts version from `llvm_dir`.""" 114 return _select_latest_llvm_version(os.listdir(llvm_dir)) 115 116 117def _get_llvm_dir(): 118 """Find the path to LLVM prebuilts.""" 119 build_top = _get_build_top() 120 121 llvm_prebuilts_base = os.environ.get('LLVM_PREBUILTS_BASE') 122 if not llvm_prebuilts_base: 123 llvm_prebuilts_base = os.path.join('prebuilts', 'clang', 'host') 124 125 llvm_dir = os.path.join( 126 build_top, llvm_prebuilts_base, _get_os_name() + '-x86') 127 128 if not os.path.exists(llvm_dir): 129 return None 130 131 llvm_prebuilts_version = os.environ.get('LLVM_PREBUILTS_VERSION') 132 if not llvm_prebuilts_version: 133 llvm_prebuilts_version = _get_latest_llvm_version(llvm_dir) 134 135 llvm_dir = os.path.join(llvm_dir, llvm_prebuilts_version) 136 137 if not os.path.exists(llvm_dir): 138 return None 139 140 return llvm_dir 141 142 143def _get_llvm_readobj(): 144 """Find the path to llvm-readobj executable.""" 145 llvm_dir = _get_llvm_dir() 146 llvm_readobj = os.path.join(llvm_dir, 'bin', 'llvm-readobj') 147 return llvm_readobj if os.path.exists(llvm_readobj) else 'llvm-readobj' 148 149 150class ELFError(ValueError): 151 """Generic ELF parse error""" 152 pass 153 154 155class ELFInvalidMagicError(ELFError): 156 """Invalid ELF magic word error""" 157 def __init__(self): 158 super(ELFInvalidMagicError, self).__init__('bad ELF magic') 159 160 161class ELFParser(object): 162 """ELF file parser""" 163 164 @classmethod 165 def _read_elf_header(cls, elf_file_path): 166 """Read the ELF magic word from the beginning of the file.""" 167 with open(elf_file_path, 'rb') as elf_file: 168 buf = elf_file.read(struct.calcsize(_ELF_HEADER_STRUCT_FMT)) 169 try: 170 return ELFHeader(*struct.unpack(_ELF_HEADER_STRUCT_FMT, buf)) 171 except struct.error: 172 return None 173 174 175 @classmethod 176 def open(cls, elf_file_path, llvm_readobj): 177 """Open and parse the ELF file.""" 178 # Parse the ELF header to check the magic word. 179 header = cls._read_elf_header(elf_file_path) 180 if not header or header.ei_magic != _ELF_MAGIC: 181 raise ELFInvalidMagicError() 182 183 # Run llvm-readobj and parse the output. 184 return cls._read_llvm_readobj(elf_file_path, header, llvm_readobj) 185 186 187 @classmethod 188 def _find_prefix(cls, pattern, lines_it): 189 """Iterate `lines_it` until finding a string that starts with `pattern`.""" 190 for line in lines_it: 191 if line.startswith(pattern): 192 return True 193 return False 194 195 196 @classmethod 197 def _read_llvm_readobj(cls, elf_file_path, header, llvm_readobj): 198 """Run llvm-readobj and parse the output.""" 199 cmd = [llvm_readobj, '--program-headers', '--dynamic-table', 200 '--dyn-symbols', elf_file_path] 201 out = subprocess.check_output(cmd, text=True) 202 lines = out.splitlines() 203 return cls._parse_llvm_readobj(elf_file_path, header, lines) 204 205 206 @classmethod 207 def _parse_llvm_readobj(cls, elf_file_path, header, lines): 208 """Parse the output of llvm-readobj.""" 209 lines_it = iter(lines) 210 alignments = cls._parse_program_headers(lines_it) 211 dt_soname, dt_needed = cls._parse_dynamic_table(elf_file_path, lines_it) 212 imported, exported = cls._parse_dynamic_symbols(lines_it) 213 return ELF(alignments, dt_soname, dt_needed, imported, exported, header) 214 215 216 _PROGRAM_HEADERS_START_PATTERN = 'ProgramHeaders [' 217 _PROGRAM_HEADERS_END_PATTERN = ']' 218 _PROGRAM_HEADER_START_PATTERN = 'ProgramHeader {' 219 _PROGRAM_HEADER_TYPE_PATTERN = re.compile('^\\s+Type:\\s+(.*)$') 220 _PROGRAM_HEADER_ALIGN_PATTERN = re.compile('^\\s+Alignment:\\s+(.*)$') 221 _PROGRAM_HEADER_END_PATTERN = '}' 222 223 224 @classmethod 225 def _parse_program_headers(cls, lines_it): 226 """Parse the dynamic table section.""" 227 alignments = [] 228 229 if not cls._find_prefix(cls._PROGRAM_HEADERS_START_PATTERN, lines_it): 230 raise ELFError() 231 232 for line in lines_it: 233 # Parse each program header 234 if line.strip() == cls._PROGRAM_HEADER_START_PATTERN: 235 p_align = None 236 p_type = None 237 for line in lines_it: 238 if line.strip() == cls._PROGRAM_HEADER_END_PATTERN: 239 if not p_align: 240 raise ELFError("Could not parse alignment from program header!") 241 if not p_type: 242 raise ELFError("Could not parse type from program header!") 243 244 if p_type.startswith("PT_LOAD "): 245 alignments.append(int(p_align)) 246 break 247 248 match = cls._PROGRAM_HEADER_TYPE_PATTERN.match(line) 249 if match: 250 p_type = match.group(1) 251 252 match = cls._PROGRAM_HEADER_ALIGN_PATTERN.match(line) 253 if match: 254 p_align = match.group(1) 255 256 if line == cls._PROGRAM_HEADERS_END_PATTERN: 257 break 258 259 return alignments 260 261 262 _DYNAMIC_SECTION_START_PATTERN = 'DynamicSection [' 263 264 _DYNAMIC_SECTION_NEEDED_PATTERN = re.compile( 265 '^ 0x[0-9a-fA-F]+\\s+NEEDED\\s+Shared library: \\[(.*)\\]$') 266 267 _DYNAMIC_SECTION_SONAME_PATTERN = re.compile( 268 '^ 0x[0-9a-fA-F]+\\s+SONAME\\s+Library soname: \\[(.*)\\]$') 269 270 _DYNAMIC_SECTION_END_PATTERN = ']' 271 272 273 @classmethod 274 def _parse_dynamic_table(cls, elf_file_path, lines_it): 275 """Parse the dynamic table section.""" 276 dt_soname = os.path.basename(elf_file_path) 277 dt_needed = [] 278 279 dynamic = cls._find_prefix(cls._DYNAMIC_SECTION_START_PATTERN, lines_it) 280 if not dynamic: 281 return (dt_soname, dt_needed) 282 283 for line in lines_it: 284 if line == cls._DYNAMIC_SECTION_END_PATTERN: 285 break 286 287 match = cls._DYNAMIC_SECTION_NEEDED_PATTERN.match(line) 288 if match: 289 dt_needed.append(match.group(1)) 290 continue 291 292 match = cls._DYNAMIC_SECTION_SONAME_PATTERN.match(line) 293 if match: 294 dt_soname = match.group(1) 295 continue 296 297 return (dt_soname, dt_needed) 298 299 300 _DYNAMIC_SYMBOLS_START_PATTERN = 'DynamicSymbols [' 301 _DYNAMIC_SYMBOLS_END_PATTERN = ']' 302 303 _SYMBOL_ENTRY_START_PATTERN = ' Symbol {' 304 _SYMBOL_ENTRY_PATTERN = re.compile('^ ([A-Za-z0-9_]+): (.*)$') 305 _SYMBOL_ENTRY_PAREN_PATTERN = re.compile( 306 '\\s+\\((?:(?:\\d+)|(?:0x[0-9a-fA-F]+))\\)$') 307 _SYMBOL_ENTRY_END_PATTERN = ' }' 308 309 310 @staticmethod 311 def _parse_symbol_name(name_with_version): 312 """Split `name_with_version` into name and version. This function may split 313 at last occurrence of `@@` or `@`.""" 314 pos = name_with_version.rfind('@') 315 if pos == -1: 316 name = name_with_version 317 version = '' 318 else: 319 if pos > 0 and name_with_version[pos - 1] == '@': 320 name = name_with_version[0:pos - 1] 321 else: 322 name = name_with_version[0:pos] 323 version = name_with_version[pos + 1:] 324 return (name, version) 325 326 327 @classmethod 328 def _parse_dynamic_symbols(cls, lines_it): 329 """Parse dynamic symbol table and collect imported and exported symbols.""" 330 imported = collections.defaultdict(set) 331 exported = collections.defaultdict(set) 332 333 for symbol in cls._parse_dynamic_symbols_internal(lines_it): 334 name, version = cls._parse_symbol_name(symbol['Name']) 335 if name: 336 if symbol['Section'] == 'Undefined': 337 if symbol['Binding'] != 'Weak': 338 imported[name].add(version) 339 else: 340 if symbol['Binding'] != 'Local': 341 exported[name].add(version) 342 343 # Freeze the returned imported/exported dict. 344 return (dict(imported), dict(exported)) 345 346 347 @classmethod 348 def _parse_dynamic_symbols_internal(cls, lines_it): 349 """Parse symbols entries and yield each symbols.""" 350 351 if not cls._find_prefix(cls._DYNAMIC_SYMBOLS_START_PATTERN, lines_it): 352 return 353 354 for line in lines_it: 355 if line == cls._DYNAMIC_SYMBOLS_END_PATTERN: 356 return 357 358 if line == cls._SYMBOL_ENTRY_START_PATTERN: 359 symbol = {} 360 continue 361 362 if line == cls._SYMBOL_ENTRY_END_PATTERN: 363 yield symbol 364 symbol = None 365 continue 366 367 match = cls._SYMBOL_ENTRY_PATTERN.match(line) 368 if match: 369 key = match.group(1) 370 value = cls._SYMBOL_ENTRY_PAREN_PATTERN.sub('', match.group(2)) 371 symbol[key] = value 372 continue 373 374 375class Checker(object): 376 """ELF file checker that checks DT_SONAME, DT_NEEDED, and symbols.""" 377 378 def __init__(self, llvm_readobj): 379 self._file_path = '' 380 self._file_under_test = None 381 self._shared_libs = [] 382 383 self._llvm_readobj = llvm_readobj 384 385 386 if sys.stderr.isatty(): 387 _ERROR_TAG = '\033[0;1;31merror:\033[m' # Red error 388 _NOTE_TAG = '\033[0;1;30mnote:\033[m' # Black note 389 else: 390 _ERROR_TAG = 'error:' # Red error 391 _NOTE_TAG = 'note:' # Black note 392 393 394 def _error(self, *args): 395 """Emit an error to stderr.""" 396 print(self._file_path + ': ' + self._ERROR_TAG, *args, file=sys.stderr) 397 398 399 def _note(self, *args): 400 """Emit a note to stderr.""" 401 print(self._file_path + ': ' + self._NOTE_TAG, *args, file=sys.stderr) 402 403 404 def _load_elf_file(self, path, skip_bad_elf_magic): 405 """Load an ELF file from the `path`.""" 406 try: 407 return ELFParser.open(path, self._llvm_readobj) 408 except (IOError, OSError): 409 self._error('Failed to open "{}".'.format(path)) 410 sys.exit(2) 411 except ELFInvalidMagicError: 412 if skip_bad_elf_magic: 413 sys.exit(0) 414 else: 415 self._error('File "{}" must have a valid ELF magic word.'.format(path)) 416 sys.exit(2) 417 except: 418 self._error('An unknown error occurred while opening "{}".'.format(path)) 419 raise 420 421 422 def load_file_under_test(self, path, skip_bad_elf_magic, 423 skip_unknown_elf_machine): 424 """Load file-under-test (either an executable or a shared lib).""" 425 self._file_path = path 426 self._file_under_test = self._load_elf_file(path, skip_bad_elf_magic) 427 428 if skip_unknown_elf_machine and \ 429 self._file_under_test.header.e_machine not in _KNOWN_MACHINES: 430 sys.exit(0) 431 432 433 def load_shared_libs(self, shared_lib_paths): 434 """Load shared libraries.""" 435 for path in shared_lib_paths: 436 self._shared_libs.append(self._load_elf_file(path, False)) 437 438 439 def check_dt_soname(self, soname): 440 """Check whether DT_SONAME matches installation file name.""" 441 if self._file_under_test.dt_soname != soname: 442 self._error('DT_SONAME "{}" must be equal to the file name "{}".' 443 .format(self._file_under_test.dt_soname, soname)) 444 sys.exit(2) 445 446 447 def check_dt_needed(self, system_shared_lib_names): 448 """Check whether all DT_NEEDED entries are specified in the build 449 system.""" 450 451 missing_shared_libs = False 452 453 # Collect the DT_SONAMEs from shared libs specified in the build system. 454 specified_sonames = {lib.dt_soname for lib in self._shared_libs} 455 456 # Chech whether all DT_NEEDED entries are specified. 457 for lib in self._file_under_test.dt_needed: 458 if lib not in specified_sonames: 459 self._error(f'DT_NEEDED "{lib}" is not specified in shared_libs.') 460 missing_shared_libs = True 461 462 if missing_shared_libs: 463 dt_needed = sorted(set(self._file_under_test.dt_needed)) 464 modules = [re.sub('\\.so$', '', lib) for lib in dt_needed] 465 466 # Remove system shared libraries from the suggestion since they are added 467 # by default. 468 modules = [name for name in modules 469 if name not in system_shared_lib_names] 470 471 self._note() 472 self._note('Fix suggestions:') 473 self._note( 474 ' Android.bp: shared_libs: [' + 475 ', '.join('"' + module + '"' for module in modules) + '],') 476 self._note( 477 ' Android.mk: LOCAL_SHARED_LIBRARIES := ' + ' '.join(modules)) 478 479 self._note() 480 self._note('If the fix above doesn\'t work, bypass this check with:') 481 self._note(' Android.bp: check_elf_files: false,') 482 self._note(' Android.mk: LOCAL_CHECK_ELF_FILES := false') 483 484 sys.exit(2) 485 486 def check_max_page_size(self, max_page_size): 487 if self._file_under_test.header.e_machine in _32_BIT_MACHINES: 488 # Skip test on 32-bit machines. 16 KB pages is an arm64 feature 489 # and no 32-bit systems in Android use it. 490 return 491 492 for alignment in self._file_under_test.alignments: 493 if alignment % max_page_size != 0: 494 self._error(f'Load segment has alignment {alignment} but ' 495 f'{max_page_size} required.') 496 self._note() 497 self._note('Fix suggestions:') 498 self._note(f' use linker flag "-Wl,-z,max-page-size={max_page_size}" ' 499 f'when compiling this lib') 500 self._note() 501 self._note('If the fix above doesn\'t work, bypass this check with:') 502 self._note(' Android.bp: ignore_max_page_size: true,') 503 self._note(' Android.mk: LOCAL_IGNORE_MAX_PAGE_SIZE := true') 504 self._note(' Device mk: PRODUCT_CHECK_PREBUILT_MAX_PAGE_SIZE := false') 505 506 # TODO: instead of exiting immediately, we may want to collect the 507 # errors from all checks and emit them at once 508 sys.exit(2) 509 510 @staticmethod 511 def _find_symbol(lib, name, version): 512 """Check whether the symbol name and version matches a definition in 513 lib.""" 514 try: 515 lib_sym_vers = lib.exported[name] 516 except KeyError: 517 return False 518 if version == '': # Symbol version is not requested 519 return True 520 return version in lib_sym_vers 521 522 523 @classmethod 524 def _find_symbol_from_libs(cls, libs, name, version): 525 """Check whether the symbol name and version is defined in one of the 526 shared libraries in libs.""" 527 for lib in libs: 528 if cls._find_symbol(lib, name, version): 529 return lib 530 return None 531 532 533 def check_symbols(self): 534 """Check whether all undefined symbols are resolved to a definition.""" 535 all_elf_files = [self._file_under_test] + self._shared_libs 536 missing_symbols = [] 537 for sym, imported_vers in self._file_under_test.imported.items(): 538 for imported_ver in imported_vers: 539 lib = self._find_symbol_from_libs(all_elf_files, sym, imported_ver) 540 if not lib: 541 missing_symbols.append((sym, imported_ver)) 542 543 if missing_symbols: 544 for sym, ver in sorted(missing_symbols): 545 if ver: 546 sym += '@' + ver 547 self._error(f'Unresolved symbol: {sym}') 548 549 self._note() 550 self._note('Some dependencies might be changed, thus the symbol(s) ' 551 'above cannot be resolved.') 552 self._note(f'Please re-build the prebuilt file: "{self._file_path}".') 553 554 self._note() 555 self._note('If this is a new prebuilt file and it is designed to have ' 556 'unresolved symbols, add one of the following properties:') 557 self._note(' Android.bp: allow_undefined_symbols: true,') 558 self._note(' Android.mk: LOCAL_ALLOW_UNDEFINED_SYMBOLS := true') 559 560 sys.exit(2) 561 562 563def _parse_args(): 564 """Parse command line options.""" 565 parser = argparse.ArgumentParser() 566 567 # Input file 568 parser.add_argument('file', 569 help='Path to the input file to be checked') 570 parser.add_argument('--soname', 571 help='Shared object name of the input file') 572 573 # Shared library dependencies 574 parser.add_argument('--shared-lib', action='append', default=[], 575 help='Path to shared library dependencies') 576 577 # System Shared library names 578 parser.add_argument('--system-shared-lib', action='append', default=[], 579 help='System shared libraries to be hidden from fix ' 580 'suggestions') 581 582 # Check options 583 parser.add_argument('--skip-bad-elf-magic', action='store_true', 584 help='Ignore the input file without the ELF magic word') 585 parser.add_argument('--skip-unknown-elf-machine', action='store_true', 586 help='Ignore the input file with unknown machine ID') 587 parser.add_argument('--allow-undefined-symbols', action='store_true', 588 help='Ignore unresolved undefined symbols') 589 parser.add_argument('--max-page-size', action='store', type=int, 590 help='Required page size alignment support') 591 592 # Other options 593 parser.add_argument('--llvm-readobj', 594 help='Path to the llvm-readobj executable') 595 596 return parser.parse_args() 597 598 599def main(): 600 """Main function""" 601 args = _parse_args() 602 603 llvm_readobj = args.llvm_readobj 604 if not llvm_readobj: 605 llvm_readobj = _get_llvm_readobj() 606 607 # Load ELF files 608 checker = Checker(llvm_readobj) 609 checker.load_file_under_test( 610 args.file, args.skip_bad_elf_magic, args.skip_unknown_elf_machine) 611 checker.load_shared_libs(args.shared_lib) 612 613 # Run checks 614 if args.soname: 615 checker.check_dt_soname(args.soname) 616 617 checker.check_dt_needed(args.system_shared_lib) 618 619 if args.max_page_size: 620 checker.check_max_page_size(args.max_page_size) 621 622 if not args.allow_undefined_symbols: 623 checker.check_symbols() 624 625 626if __name__ == '__main__': 627 main() 628