1#!/usr/bin/env python3 2# 3# Copyright (C) 2019 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""ELF file checker. 18 19This command ensures all undefined symbols in an ELF file can be resolved to 20global (or weak) symbols defined in shared objects specified in DT_NEEDED 21entries. 22""" 23 24from __future__ import print_function 25 26import argparse 27import collections 28import os 29import os.path 30import re 31import struct 32import subprocess 33import sys 34 35 36_ELF_MAGIC = b'\x7fELF' 37 38 39# Known machines 40_EM_386 = 3 41_EM_ARM = 40 42_EM_X86_64 = 62 43_EM_AARCH64 = 183 44 45_KNOWN_MACHINES = {_EM_386, _EM_ARM, _EM_X86_64, _EM_AARCH64} 46 47 48# ELF header struct 49_ELF_HEADER_STRUCT = ( 50 ('ei_magic', '4s'), 51 ('ei_class', 'B'), 52 ('ei_data', 'B'), 53 ('ei_version', 'B'), 54 ('ei_osabi', 'B'), 55 ('ei_pad', '8s'), 56 ('e_type', 'H'), 57 ('e_machine', 'H'), 58 ('e_version', 'I'), 59) 60 61_ELF_HEADER_STRUCT_FMT = ''.join(_fmt for _, _fmt in _ELF_HEADER_STRUCT) 62 63 64ELFHeader = collections.namedtuple( 65 'ELFHeader', [_name for _name, _ in _ELF_HEADER_STRUCT]) 66 67 68ELF = collections.namedtuple( 69 'ELF', 70 ('dt_soname', 'dt_needed', 'imported', 'exported', 'header')) 71 72 73def _get_os_name(): 74 """Get the host OS name.""" 75 if sys.platform.startswith('linux'): 76 return 'linux' 77 if sys.platform.startswith('darwin'): 78 return 'darwin' 79 raise ValueError(sys.platform + ' is not supported') 80 81 82def _get_build_top(): 83 """Find the build top of the source tree ($ANDROID_BUILD_TOP).""" 84 prev_path = None 85 curr_path = os.path.abspath(os.getcwd()) 86 while prev_path != curr_path: 87 if os.path.exists(os.path.join(curr_path, '.repo')): 88 return curr_path 89 prev_path = curr_path 90 curr_path = os.path.dirname(curr_path) 91 return None 92 93 94def _select_latest_llvm_version(versions): 95 """Select the latest LLVM prebuilts version from a set of versions.""" 96 pattern = re.compile('clang-r([0-9]+)([a-z]?)') 97 found_rev = 0 98 found_ver = None 99 for curr_ver in versions: 100 match = pattern.match(curr_ver) 101 if not match: 102 continue 103 curr_rev = int(match.group(1)) 104 if not found_ver or curr_rev > found_rev or ( 105 curr_rev == found_rev and curr_ver > found_ver): 106 found_rev = curr_rev 107 found_ver = curr_ver 108 return found_ver 109 110 111def _get_latest_llvm_version(llvm_dir): 112 """Find the latest LLVM prebuilts version from `llvm_dir`.""" 113 return _select_latest_llvm_version(os.listdir(llvm_dir)) 114 115 116def _get_llvm_dir(): 117 """Find the path to LLVM prebuilts.""" 118 build_top = _get_build_top() 119 120 llvm_prebuilts_base = os.environ.get('LLVM_PREBUILTS_BASE') 121 if not llvm_prebuilts_base: 122 llvm_prebuilts_base = os.path.join('prebuilts', 'clang', 'host') 123 124 llvm_dir = os.path.join( 125 build_top, llvm_prebuilts_base, _get_os_name() + '-x86') 126 127 if not os.path.exists(llvm_dir): 128 return None 129 130 llvm_prebuilts_version = os.environ.get('LLVM_PREBUILTS_VERSION') 131 if not llvm_prebuilts_version: 132 llvm_prebuilts_version = _get_latest_llvm_version(llvm_dir) 133 134 llvm_dir = os.path.join(llvm_dir, llvm_prebuilts_version) 135 136 if not os.path.exists(llvm_dir): 137 return None 138 139 return llvm_dir 140 141 142def _get_llvm_readobj(): 143 """Find the path to llvm-readobj executable.""" 144 llvm_dir = _get_llvm_dir() 145 llvm_readobj = os.path.join(llvm_dir, 'bin', 'llvm-readobj') 146 return llvm_readobj if os.path.exists(llvm_readobj) else 'llvm-readobj' 147 148 149class ELFError(ValueError): 150 """Generic ELF parse error""" 151 pass 152 153 154class ELFInvalidMagicError(ELFError): 155 """Invalid ELF magic word error""" 156 def __init__(self): 157 super(ELFInvalidMagicError, self).__init__('bad ELF magic') 158 159 160class ELFParser(object): 161 """ELF file parser""" 162 163 @classmethod 164 def _read_elf_header(cls, elf_file_path): 165 """Read the ELF magic word from the beginning of the file.""" 166 with open(elf_file_path, 'rb') as elf_file: 167 buf = elf_file.read(struct.calcsize(_ELF_HEADER_STRUCT_FMT)) 168 try: 169 return ELFHeader(*struct.unpack(_ELF_HEADER_STRUCT_FMT, buf)) 170 except struct.error: 171 return None 172 173 174 @classmethod 175 def open(cls, elf_file_path, llvm_readobj): 176 """Open and parse the ELF file.""" 177 # Parse the ELF header to check the magic word. 178 header = cls._read_elf_header(elf_file_path) 179 if not header or header.ei_magic != _ELF_MAGIC: 180 raise ELFInvalidMagicError() 181 182 # Run llvm-readobj and parse the output. 183 return cls._read_llvm_readobj(elf_file_path, header, llvm_readobj) 184 185 186 @classmethod 187 def _find_prefix(cls, pattern, lines_it): 188 """Iterate `lines_it` until finding a string that starts with `pattern`.""" 189 for line in lines_it: 190 if line.startswith(pattern): 191 return True 192 return False 193 194 195 @classmethod 196 def _read_llvm_readobj(cls, elf_file_path, header, llvm_readobj): 197 """Run llvm-readobj and parse the output.""" 198 cmd = [llvm_readobj, '--dynamic-table', '--dyn-symbols', elf_file_path] 199 out = subprocess.check_output(cmd, text=True) 200 lines = out.splitlines() 201 return cls._parse_llvm_readobj(elf_file_path, header, lines) 202 203 204 @classmethod 205 def _parse_llvm_readobj(cls, elf_file_path, header, lines): 206 """Parse the output of llvm-readobj.""" 207 lines_it = iter(lines) 208 dt_soname, dt_needed = cls._parse_dynamic_table(elf_file_path, lines_it) 209 imported, exported = cls._parse_dynamic_symbols(lines_it) 210 return ELF(dt_soname, dt_needed, imported, exported, header) 211 212 213 _DYNAMIC_SECTION_START_PATTERN = 'DynamicSection [' 214 215 _DYNAMIC_SECTION_NEEDED_PATTERN = re.compile( 216 '^ 0x[0-9a-fA-F]+\\s+NEEDED\\s+Shared library: \\[(.*)\\]$') 217 218 _DYNAMIC_SECTION_SONAME_PATTERN = re.compile( 219 '^ 0x[0-9a-fA-F]+\\s+SONAME\\s+Library soname: \\[(.*)\\]$') 220 221 _DYNAMIC_SECTION_END_PATTERN = ']' 222 223 224 @classmethod 225 def _parse_dynamic_table(cls, elf_file_path, lines_it): 226 """Parse the dynamic table section.""" 227 dt_soname = os.path.basename(elf_file_path) 228 dt_needed = [] 229 230 dynamic = cls._find_prefix(cls._DYNAMIC_SECTION_START_PATTERN, lines_it) 231 if not dynamic: 232 return (dt_soname, dt_needed) 233 234 for line in lines_it: 235 if line == cls._DYNAMIC_SECTION_END_PATTERN: 236 break 237 238 match = cls._DYNAMIC_SECTION_NEEDED_PATTERN.match(line) 239 if match: 240 dt_needed.append(match.group(1)) 241 continue 242 243 match = cls._DYNAMIC_SECTION_SONAME_PATTERN.match(line) 244 if match: 245 dt_soname = match.group(1) 246 continue 247 248 return (dt_soname, dt_needed) 249 250 251 _DYNAMIC_SYMBOLS_START_PATTERN = 'DynamicSymbols [' 252 _DYNAMIC_SYMBOLS_END_PATTERN = ']' 253 254 _SYMBOL_ENTRY_START_PATTERN = ' Symbol {' 255 _SYMBOL_ENTRY_PATTERN = re.compile('^ ([A-Za-z0-9_]+): (.*)$') 256 _SYMBOL_ENTRY_PAREN_PATTERN = re.compile( 257 '\\s+\\((?:(?:\\d+)|(?:0x[0-9a-fA-F]+))\\)$') 258 _SYMBOL_ENTRY_END_PATTERN = ' }' 259 260 261 @staticmethod 262 def _parse_symbol_name(name_with_version): 263 """Split `name_with_version` into name and version. This function may split 264 at last occurrence of `@@` or `@`.""" 265 pos = name_with_version.rfind('@') 266 if pos == -1: 267 name = name_with_version 268 version = '' 269 else: 270 if pos > 0 and name_with_version[pos - 1] == '@': 271 name = name_with_version[0:pos - 1] 272 else: 273 name = name_with_version[0:pos] 274 version = name_with_version[pos + 1:] 275 return (name, version) 276 277 278 @classmethod 279 def _parse_dynamic_symbols(cls, lines_it): 280 """Parse dynamic symbol table and collect imported and exported symbols.""" 281 imported = collections.defaultdict(set) 282 exported = collections.defaultdict(set) 283 284 for symbol in cls._parse_dynamic_symbols_internal(lines_it): 285 name, version = cls._parse_symbol_name(symbol['Name']) 286 if name: 287 if symbol['Section'] == 'Undefined': 288 if symbol['Binding'] != 'Weak': 289 imported[name].add(version) 290 else: 291 if symbol['Binding'] != 'Local': 292 exported[name].add(version) 293 294 # Freeze the returned imported/exported dict. 295 return (dict(imported), dict(exported)) 296 297 298 @classmethod 299 def _parse_dynamic_symbols_internal(cls, lines_it): 300 """Parse symbols entries and yield each symbols.""" 301 302 if not cls._find_prefix(cls._DYNAMIC_SYMBOLS_START_PATTERN, lines_it): 303 return 304 305 for line in lines_it: 306 if line == cls._DYNAMIC_SYMBOLS_END_PATTERN: 307 return 308 309 if line == cls._SYMBOL_ENTRY_START_PATTERN: 310 symbol = {} 311 continue 312 313 if line == cls._SYMBOL_ENTRY_END_PATTERN: 314 yield symbol 315 symbol = None 316 continue 317 318 match = cls._SYMBOL_ENTRY_PATTERN.match(line) 319 if match: 320 key = match.group(1) 321 value = cls._SYMBOL_ENTRY_PAREN_PATTERN.sub('', match.group(2)) 322 symbol[key] = value 323 continue 324 325 326class Checker(object): 327 """ELF file checker that checks DT_SONAME, DT_NEEDED, and symbols.""" 328 329 def __init__(self, llvm_readobj): 330 self._file_path = '' 331 self._file_under_test = None 332 self._shared_libs = [] 333 334 self._llvm_readobj = llvm_readobj 335 336 337 if sys.stderr.isatty(): 338 _ERROR_TAG = '\033[0;1;31merror:\033[m' # Red error 339 _NOTE_TAG = '\033[0;1;30mnote:\033[m' # Black note 340 else: 341 _ERROR_TAG = 'error:' # Red error 342 _NOTE_TAG = 'note:' # Black note 343 344 345 def _error(self, *args): 346 """Emit an error to stderr.""" 347 print(self._file_path + ': ' + self._ERROR_TAG, *args, file=sys.stderr) 348 349 350 def _note(self, *args): 351 """Emit a note to stderr.""" 352 print(self._file_path + ': ' + self._NOTE_TAG, *args, file=sys.stderr) 353 354 355 def _load_elf_file(self, path, skip_bad_elf_magic): 356 """Load an ELF file from the `path`.""" 357 try: 358 return ELFParser.open(path, self._llvm_readobj) 359 except (IOError, OSError): 360 self._error('Failed to open "{}".'.format(path)) 361 sys.exit(2) 362 except ELFInvalidMagicError: 363 if skip_bad_elf_magic: 364 sys.exit(0) 365 else: 366 self._error('File "{}" must have a valid ELF magic word.'.format(path)) 367 sys.exit(2) 368 except: 369 self._error('An unknown error occurred while opening "{}".'.format(path)) 370 raise 371 372 373 def load_file_under_test(self, path, skip_bad_elf_magic, 374 skip_unknown_elf_machine): 375 """Load file-under-test (either an executable or a shared lib).""" 376 self._file_path = path 377 self._file_under_test = self._load_elf_file(path, skip_bad_elf_magic) 378 379 if skip_unknown_elf_machine and \ 380 self._file_under_test.header.e_machine not in _KNOWN_MACHINES: 381 sys.exit(0) 382 383 384 def load_shared_libs(self, shared_lib_paths): 385 """Load shared libraries.""" 386 for path in shared_lib_paths: 387 self._shared_libs.append(self._load_elf_file(path, False)) 388 389 390 def check_dt_soname(self, soname): 391 """Check whether DT_SONAME matches installation file name.""" 392 if self._file_under_test.dt_soname != soname: 393 self._error('DT_SONAME "{}" must be equal to the file name "{}".' 394 .format(self._file_under_test.dt_soname, soname)) 395 sys.exit(2) 396 397 398 def check_dt_needed(self, system_shared_lib_names): 399 """Check whether all DT_NEEDED entries are specified in the build 400 system.""" 401 402 missing_shared_libs = False 403 404 # Collect the DT_SONAMEs from shared libs specified in the build system. 405 specified_sonames = {lib.dt_soname for lib in self._shared_libs} 406 407 # Chech whether all DT_NEEDED entries are specified. 408 for lib in self._file_under_test.dt_needed: 409 if lib not in specified_sonames: 410 self._error(f'DT_NEEDED "{lib}" is not specified in shared_libs.') 411 missing_shared_libs = True 412 413 if missing_shared_libs: 414 dt_needed = sorted(set(self._file_under_test.dt_needed)) 415 modules = [re.sub('\\.so$', '', lib) for lib in dt_needed] 416 417 # Remove system shared libraries from the suggestion since they are added 418 # by default. 419 modules = [name for name in modules 420 if name not in system_shared_lib_names] 421 422 self._note() 423 self._note('Fix suggestions:') 424 self._note( 425 ' Android.bp: shared_libs: [' + 426 ', '.join('"' + module + '"' for module in modules) + '],') 427 self._note( 428 ' Android.mk: LOCAL_SHARED_LIBRARIES := ' + ' '.join(modules)) 429 430 self._note() 431 self._note('If the fix above doesn\'t work, bypass this check with:') 432 self._note(' Android.bp: check_elf_files: false,') 433 self._note(' Android.mk: LOCAL_CHECK_ELF_FILES := false') 434 435 sys.exit(2) 436 437 438 @staticmethod 439 def _find_symbol(lib, name, version): 440 """Check whether the symbol name and version matches a definition in 441 lib.""" 442 try: 443 lib_sym_vers = lib.exported[name] 444 except KeyError: 445 return False 446 if version == '': # Symbol version is not requested 447 return True 448 return version in lib_sym_vers 449 450 451 @classmethod 452 def _find_symbol_from_libs(cls, libs, name, version): 453 """Check whether the symbol name and version is defined in one of the 454 shared libraries in libs.""" 455 for lib in libs: 456 if cls._find_symbol(lib, name, version): 457 return lib 458 return None 459 460 461 def check_symbols(self): 462 """Check whether all undefined symbols are resolved to a definition.""" 463 all_elf_files = [self._file_under_test] + self._shared_libs 464 missing_symbols = [] 465 for sym, imported_vers in self._file_under_test.imported.items(): 466 for imported_ver in imported_vers: 467 lib = self._find_symbol_from_libs(all_elf_files, sym, imported_ver) 468 if not lib: 469 missing_symbols.append((sym, imported_ver)) 470 471 if missing_symbols: 472 for sym, ver in sorted(missing_symbols): 473 if ver: 474 sym += '@' + ver 475 self._error(f'Unresolved symbol: {sym}') 476 477 self._note() 478 self._note('Some dependencies might be changed, thus the symbol(s) ' 479 'above cannot be resolved.') 480 self._note(f'Please re-build the prebuilt file: "{self._file_path}".') 481 482 self._note() 483 self._note('If this is a new prebuilt file and it is designed to have ' 484 'unresolved symbols, add one of the following properties:') 485 self._note(' Android.bp: allow_undefined_symbols: true,') 486 self._note(' Android.mk: LOCAL_ALLOW_UNDEFINED_SYMBOLS := true') 487 488 sys.exit(2) 489 490 491def _parse_args(): 492 """Parse command line options.""" 493 parser = argparse.ArgumentParser() 494 495 # Input file 496 parser.add_argument('file', 497 help='Path to the input file to be checked') 498 parser.add_argument('--soname', 499 help='Shared object name of the input file') 500 501 # Shared library dependencies 502 parser.add_argument('--shared-lib', action='append', default=[], 503 help='Path to shared library dependencies') 504 505 # System Shared library names 506 parser.add_argument('--system-shared-lib', action='append', default=[], 507 help='System shared libraries to be hidden from fix ' 508 'suggestions') 509 510 # Check options 511 parser.add_argument('--skip-bad-elf-magic', action='store_true', 512 help='Ignore the input file without the ELF magic word') 513 parser.add_argument('--skip-unknown-elf-machine', action='store_true', 514 help='Ignore the input file with unknown machine ID') 515 parser.add_argument('--allow-undefined-symbols', action='store_true', 516 help='Ignore unresolved undefined symbols') 517 518 # Other options 519 parser.add_argument('--llvm-readobj', 520 help='Path to the llvm-readobj executable') 521 522 return parser.parse_args() 523 524 525def main(): 526 """Main function""" 527 args = _parse_args() 528 529 llvm_readobj = args.llvm_readobj 530 if not llvm_readobj: 531 llvm_readobj = _get_llvm_readobj() 532 533 # Load ELF files 534 checker = Checker(llvm_readobj) 535 checker.load_file_under_test( 536 args.file, args.skip_bad_elf_magic, args.skip_unknown_elf_machine) 537 checker.load_shared_libs(args.shared_lib) 538 539 # Run checks 540 if args.soname: 541 checker.check_dt_soname(args.soname) 542 543 checker.check_dt_needed(args.system_shared_lib) 544 545 if not args.allow_undefined_symbols: 546 checker.check_symbols() 547 548 549if __name__ == '__main__': 550 main() 551