• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#
3# Copyright (C) 2019 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""ELF file checker.
18
19This command ensures all undefined symbols in an ELF file can be resolved to
20global (or weak) symbols defined in shared objects specified in DT_NEEDED
21entries.
22"""
23
24from __future__ import print_function
25
26import argparse
27import collections
28import os
29import os.path
30import re
31import struct
32import subprocess
33import sys
34
35
36_ELF_MAGIC = b'\x7fELF'
37
38
39# Known machines
40_EM_386 = 3
41_EM_ARM = 40
42_EM_X86_64 = 62
43_EM_AARCH64 = 183
44
45_32_BIT_MACHINES = {_EM_386, _EM_ARM}
46_64_BIT_MACHINES = {_EM_X86_64, _EM_AARCH64}
47_KNOWN_MACHINES = _32_BIT_MACHINES | _64_BIT_MACHINES
48
49# ELF header struct
50_ELF_HEADER_STRUCT = (
51  ('ei_magic', '4s'),
52  ('ei_class', 'B'),
53  ('ei_data', 'B'),
54  ('ei_version', 'B'),
55  ('ei_osabi', 'B'),
56  ('ei_pad', '8s'),
57  ('e_type', 'H'),
58  ('e_machine', 'H'),
59  ('e_version', 'I'),
60)
61
62_ELF_HEADER_STRUCT_FMT = ''.join(_fmt for _, _fmt in _ELF_HEADER_STRUCT)
63
64
65ELFHeader = collections.namedtuple(
66  'ELFHeader', [_name for _name, _ in _ELF_HEADER_STRUCT])
67
68
69ELF = collections.namedtuple(
70  'ELF',
71  ('alignments', 'dt_soname', 'dt_needed', 'imported', 'exported', 'header'))
72
73
74def _get_os_name():
75  """Get the host OS name."""
76  if sys.platform.startswith('linux'):
77    return 'linux'
78  if sys.platform.startswith('darwin'):
79    return 'darwin'
80  raise ValueError(sys.platform + ' is not supported')
81
82
83def _get_build_top():
84  """Find the build top of the source tree ($ANDROID_BUILD_TOP)."""
85  prev_path = None
86  curr_path = os.path.abspath(os.getcwd())
87  while prev_path != curr_path:
88    if os.path.exists(os.path.join(curr_path, '.repo')):
89      return curr_path
90    prev_path = curr_path
91    curr_path = os.path.dirname(curr_path)
92  return None
93
94
95def _select_latest_llvm_version(versions):
96  """Select the latest LLVM prebuilts version from a set of versions."""
97  pattern = re.compile('clang-r([0-9]+)([a-z]?)')
98  found_rev = 0
99  found_ver = None
100  for curr_ver in versions:
101    match = pattern.match(curr_ver)
102    if not match:
103      continue
104    curr_rev = int(match.group(1))
105    if not found_ver or curr_rev > found_rev or (
106        curr_rev == found_rev and curr_ver > found_ver):
107      found_rev = curr_rev
108      found_ver = curr_ver
109  return found_ver
110
111
112def _get_latest_llvm_version(llvm_dir):
113  """Find the latest LLVM prebuilts version from `llvm_dir`."""
114  return _select_latest_llvm_version(os.listdir(llvm_dir))
115
116
117def _get_llvm_dir():
118  """Find the path to LLVM prebuilts."""
119  build_top = _get_build_top()
120
121  llvm_prebuilts_base = os.environ.get('LLVM_PREBUILTS_BASE')
122  if not llvm_prebuilts_base:
123    llvm_prebuilts_base = os.path.join('prebuilts', 'clang', 'host')
124
125  llvm_dir = os.path.join(
126    build_top, llvm_prebuilts_base, _get_os_name() + '-x86')
127
128  if not os.path.exists(llvm_dir):
129    return None
130
131  llvm_prebuilts_version = os.environ.get('LLVM_PREBUILTS_VERSION')
132  if not llvm_prebuilts_version:
133    llvm_prebuilts_version = _get_latest_llvm_version(llvm_dir)
134
135  llvm_dir = os.path.join(llvm_dir, llvm_prebuilts_version)
136
137  if not os.path.exists(llvm_dir):
138    return None
139
140  return llvm_dir
141
142
143def _get_llvm_readobj():
144  """Find the path to llvm-readobj executable."""
145  llvm_dir = _get_llvm_dir()
146  llvm_readobj = os.path.join(llvm_dir, 'bin', 'llvm-readobj')
147  return llvm_readobj if os.path.exists(llvm_readobj) else 'llvm-readobj'
148
149
150class ELFError(ValueError):
151  """Generic ELF parse error"""
152  pass
153
154
155class ELFInvalidMagicError(ELFError):
156  """Invalid ELF magic word error"""
157  def __init__(self):
158    super(ELFInvalidMagicError, self).__init__('bad ELF magic')
159
160
161class ELFParser(object):
162  """ELF file parser"""
163
164  @classmethod
165  def _read_elf_header(cls, elf_file_path):
166    """Read the ELF magic word from the beginning of the file."""
167    with open(elf_file_path, 'rb') as elf_file:
168      buf = elf_file.read(struct.calcsize(_ELF_HEADER_STRUCT_FMT))
169      try:
170        return ELFHeader(*struct.unpack(_ELF_HEADER_STRUCT_FMT, buf))
171      except struct.error:
172        return None
173
174
175  @classmethod
176  def open(cls, elf_file_path, llvm_readobj):
177    """Open and parse the ELF file."""
178    # Parse the ELF header to check the magic word.
179    header = cls._read_elf_header(elf_file_path)
180    if not header or header.ei_magic != _ELF_MAGIC:
181      raise ELFInvalidMagicError()
182
183    # Run llvm-readobj and parse the output.
184    return cls._read_llvm_readobj(elf_file_path, header, llvm_readobj)
185
186
187  @classmethod
188  def _find_prefix(cls, pattern, lines_it):
189    """Iterate `lines_it` until finding a string that starts with `pattern`."""
190    for line in lines_it:
191      if line.startswith(pattern):
192        return True
193    return False
194
195
196  @classmethod
197  def _read_llvm_readobj(cls, elf_file_path, header, llvm_readobj):
198    """Run llvm-readobj and parse the output."""
199    cmd = [llvm_readobj, '--program-headers', '--dynamic-table',
200           '--dyn-symbols', elf_file_path]
201    out = subprocess.check_output(cmd, text=True)
202    lines = out.splitlines()
203    return cls._parse_llvm_readobj(elf_file_path, header, lines)
204
205
206  @classmethod
207  def _parse_llvm_readobj(cls, elf_file_path, header, lines):
208    """Parse the output of llvm-readobj."""
209    lines_it = iter(lines)
210    alignments = cls._parse_program_headers(lines_it)
211    dt_soname, dt_needed = cls._parse_dynamic_table(elf_file_path, lines_it)
212    imported, exported = cls._parse_dynamic_symbols(lines_it)
213    return ELF(alignments, dt_soname, dt_needed, imported, exported, header)
214
215
216  _PROGRAM_HEADERS_START_PATTERN = 'ProgramHeaders ['
217  _PROGRAM_HEADERS_END_PATTERN = ']'
218  _PROGRAM_HEADER_START_PATTERN = 'ProgramHeader {'
219  _PROGRAM_HEADER_TYPE_PATTERN = re.compile('^\\s+Type:\\s+(.*)$')
220  _PROGRAM_HEADER_ALIGN_PATTERN = re.compile('^\\s+Alignment:\\s+(.*)$')
221  _PROGRAM_HEADER_END_PATTERN = '}'
222
223
224  @classmethod
225  def _parse_program_headers(cls, lines_it):
226    """Parse the dynamic table section."""
227    alignments = []
228
229    if not cls._find_prefix(cls._PROGRAM_HEADERS_START_PATTERN, lines_it):
230      raise ELFError()
231
232    for line in lines_it:
233      # Parse each program header
234      if line.strip() == cls._PROGRAM_HEADER_START_PATTERN:
235        p_align = None
236        p_type = None
237        for line in lines_it:
238          if line.strip() == cls._PROGRAM_HEADER_END_PATTERN:
239            if not p_align:
240              raise ELFError("Could not parse alignment from program header!")
241            if not p_type:
242              raise ELFError("Could not parse type from program header!")
243
244            if p_type.startswith("PT_LOAD "):
245              alignments.append(int(p_align))
246            break
247
248          match = cls._PROGRAM_HEADER_TYPE_PATTERN.match(line)
249          if match:
250            p_type = match.group(1)
251
252          match = cls._PROGRAM_HEADER_ALIGN_PATTERN.match(line)
253          if match:
254            p_align = match.group(1)
255
256      if line == cls._PROGRAM_HEADERS_END_PATTERN:
257        break
258
259    return alignments
260
261
262  _DYNAMIC_SECTION_START_PATTERN = 'DynamicSection ['
263
264  _DYNAMIC_SECTION_NEEDED_PATTERN = re.compile(
265    '^  0x[0-9a-fA-F]+\\s+NEEDED\\s+Shared library: \\[(.*)\\]$')
266
267  _DYNAMIC_SECTION_SONAME_PATTERN = re.compile(
268    '^  0x[0-9a-fA-F]+\\s+SONAME\\s+Library soname: \\[(.*)\\]$')
269
270  _DYNAMIC_SECTION_END_PATTERN = ']'
271
272
273  @classmethod
274  def _parse_dynamic_table(cls, elf_file_path, lines_it):
275    """Parse the dynamic table section."""
276    dt_soname = os.path.basename(elf_file_path)
277    dt_needed = []
278
279    dynamic = cls._find_prefix(cls._DYNAMIC_SECTION_START_PATTERN, lines_it)
280    if not dynamic:
281      return (dt_soname, dt_needed)
282
283    for line in lines_it:
284      if line == cls._DYNAMIC_SECTION_END_PATTERN:
285        break
286
287      match = cls._DYNAMIC_SECTION_NEEDED_PATTERN.match(line)
288      if match:
289        dt_needed.append(match.group(1))
290        continue
291
292      match = cls._DYNAMIC_SECTION_SONAME_PATTERN.match(line)
293      if match:
294        dt_soname = match.group(1)
295        continue
296
297    return (dt_soname, dt_needed)
298
299
300  _DYNAMIC_SYMBOLS_START_PATTERN = 'DynamicSymbols ['
301  _DYNAMIC_SYMBOLS_END_PATTERN = ']'
302
303  _SYMBOL_ENTRY_START_PATTERN = '  Symbol {'
304  _SYMBOL_ENTRY_PATTERN = re.compile('^    ([A-Za-z0-9_]+): (.*)$')
305  _SYMBOL_ENTRY_PAREN_PATTERN = re.compile(
306    '\\s+\\((?:(?:\\d+)|(?:0x[0-9a-fA-F]+))\\)$')
307  _SYMBOL_ENTRY_END_PATTERN = '  }'
308
309
310  @staticmethod
311  def _parse_symbol_name(name_with_version):
312    """Split `name_with_version` into name and version. This function may split
313    at last occurrence of `@@` or `@`."""
314    pos = name_with_version.rfind('@')
315    if pos == -1:
316      name = name_with_version
317      version = ''
318    else:
319      if pos > 0 and name_with_version[pos - 1] == '@':
320        name = name_with_version[0:pos - 1]
321      else:
322        name = name_with_version[0:pos]
323      version = name_with_version[pos + 1:]
324    return (name, version)
325
326
327  @classmethod
328  def _parse_dynamic_symbols(cls, lines_it):
329    """Parse dynamic symbol table and collect imported and exported symbols."""
330    imported = collections.defaultdict(set)
331    exported = collections.defaultdict(set)
332
333    for symbol in cls._parse_dynamic_symbols_internal(lines_it):
334      name, version = cls._parse_symbol_name(symbol['Name'])
335      if name:
336        if symbol['Section'] == 'Undefined':
337          if symbol['Binding'] != 'Weak':
338            imported[name].add(version)
339        else:
340          if symbol['Binding'] != 'Local':
341            exported[name].add(version)
342
343    # Freeze the returned imported/exported dict.
344    return (dict(imported), dict(exported))
345
346
347  @classmethod
348  def _parse_dynamic_symbols_internal(cls, lines_it):
349    """Parse symbols entries and yield each symbols."""
350
351    if not cls._find_prefix(cls._DYNAMIC_SYMBOLS_START_PATTERN, lines_it):
352      return
353
354    for line in lines_it:
355      if line == cls._DYNAMIC_SYMBOLS_END_PATTERN:
356        return
357
358      if line == cls._SYMBOL_ENTRY_START_PATTERN:
359        symbol = {}
360        continue
361
362      if line == cls._SYMBOL_ENTRY_END_PATTERN:
363        yield symbol
364        symbol = None
365        continue
366
367      match = cls._SYMBOL_ENTRY_PATTERN.match(line)
368      if match:
369        key = match.group(1)
370        value = cls._SYMBOL_ENTRY_PAREN_PATTERN.sub('', match.group(2))
371        symbol[key] = value
372        continue
373
374
375class Checker(object):
376  """ELF file checker that checks DT_SONAME, DT_NEEDED, and symbols."""
377
378  def __init__(self, llvm_readobj):
379    self._file_path = ''
380    self._file_under_test = None
381    self._shared_libs = []
382
383    self._llvm_readobj = llvm_readobj
384
385
386  if sys.stderr.isatty():
387    _ERROR_TAG = '\033[0;1;31merror:\033[m'  # Red error
388    _NOTE_TAG = '\033[0;1;30mnote:\033[m'  # Black note
389  else:
390    _ERROR_TAG = 'error:'  # Red error
391    _NOTE_TAG = 'note:'  # Black note
392
393
394  def _error(self, *args):
395    """Emit an error to stderr."""
396    print(self._file_path + ': ' + self._ERROR_TAG, *args, file=sys.stderr)
397
398
399  def _note(self, *args):
400    """Emit a note to stderr."""
401    print(self._file_path + ': ' + self._NOTE_TAG, *args, file=sys.stderr)
402
403
404  def _load_elf_file(self, path, skip_bad_elf_magic):
405    """Load an ELF file from the `path`."""
406    try:
407      return ELFParser.open(path, self._llvm_readobj)
408    except (IOError, OSError):
409      self._error('Failed to open "{}".'.format(path))
410      sys.exit(2)
411    except ELFInvalidMagicError:
412      if skip_bad_elf_magic:
413        sys.exit(0)
414      else:
415        self._error('File "{}" must have a valid ELF magic word.'.format(path))
416        sys.exit(2)
417    except:
418      self._error('An unknown error occurred while opening "{}".'.format(path))
419      raise
420
421
422  def load_file_under_test(self, path, skip_bad_elf_magic,
423                           skip_unknown_elf_machine):
424    """Load file-under-test (either an executable or a shared lib)."""
425    self._file_path = path
426    self._file_under_test = self._load_elf_file(path, skip_bad_elf_magic)
427
428    if skip_unknown_elf_machine and \
429        self._file_under_test.header.e_machine not in _KNOWN_MACHINES:
430      sys.exit(0)
431
432
433  def load_shared_libs(self, shared_lib_paths):
434    """Load shared libraries."""
435    for path in shared_lib_paths:
436      self._shared_libs.append(self._load_elf_file(path, False))
437
438
439  def check_dt_soname(self, soname):
440    """Check whether DT_SONAME matches installation file name."""
441    if self._file_under_test.dt_soname != soname:
442      self._error('DT_SONAME "{}" must be equal to the file name "{}".'
443                  .format(self._file_under_test.dt_soname, soname))
444      sys.exit(2)
445
446
447  def check_dt_needed(self, system_shared_lib_names):
448    """Check whether all DT_NEEDED entries are specified in the build
449    system."""
450
451    missing_shared_libs = False
452
453    # Collect the DT_SONAMEs from shared libs specified in the build system.
454    specified_sonames = {lib.dt_soname for lib in self._shared_libs}
455
456    # Chech whether all DT_NEEDED entries are specified.
457    for lib in self._file_under_test.dt_needed:
458      if lib not in specified_sonames:
459        self._error(f'DT_NEEDED "{lib}" is not specified in shared_libs.')
460        missing_shared_libs = True
461
462    if missing_shared_libs:
463      dt_needed = sorted(set(self._file_under_test.dt_needed))
464      modules = [re.sub('\\.so$', '', lib) for lib in dt_needed]
465
466      # Remove system shared libraries from the suggestion since they are added
467      # by default.
468      modules = [name for name in modules
469                 if name not in system_shared_lib_names]
470
471      self._note()
472      self._note('Fix suggestions:')
473      self._note(
474        '  Android.bp: shared_libs: [' +
475        ', '.join('"' + module + '"' for module in modules) + '],')
476      self._note(
477        '  Android.mk: LOCAL_SHARED_LIBRARIES := ' + ' '.join(modules))
478
479      self._note()
480      self._note('If the fix above doesn\'t work, bypass this check with:')
481      self._note('  Android.bp: check_elf_files: false,')
482      self._note('  Android.mk: LOCAL_CHECK_ELF_FILES := false')
483
484      sys.exit(2)
485
486  def check_max_page_size(self, max_page_size):
487    if self._file_under_test.header.e_machine in _32_BIT_MACHINES:
488      # Skip test on 32-bit machines. 16 KB pages is an arm64 feature
489      # and no 32-bit systems in Android use it.
490      return
491
492    for alignment in self._file_under_test.alignments:
493      if alignment % max_page_size != 0:
494        self._error(f'Load segment has alignment {alignment} but '
495                    f'{max_page_size} required.')
496        self._note()
497        self._note('Fix suggestions:')
498        self._note(f'  use linker flag "-Wl,-z,max-page-size={max_page_size}" '
499                   f'when compiling this lib')
500        self._note()
501        self._note('If the fix above doesn\'t work, bypass this check with:')
502        self._note('  Android.bp: ignore_max_page_size: true,')
503        self._note('  Android.mk: LOCAL_IGNORE_MAX_PAGE_SIZE := true')
504        self._note('  Device mk: PRODUCT_CHECK_PREBUILT_MAX_PAGE_SIZE := false')
505
506        # TODO: instead of exiting immediately, we may want to collect the
507        # errors from all checks and emit them at once
508        sys.exit(2)
509
510  @staticmethod
511  def _find_symbol(lib, name, version):
512    """Check whether the symbol name and version matches a definition in
513    lib."""
514    try:
515      lib_sym_vers = lib.exported[name]
516    except KeyError:
517      return False
518    if version == '':  # Symbol version is not requested
519      return True
520    return version in lib_sym_vers
521
522
523  @classmethod
524  def _find_symbol_from_libs(cls, libs, name, version):
525    """Check whether the symbol name and version is defined in one of the
526    shared libraries in libs."""
527    for lib in libs:
528      if cls._find_symbol(lib, name, version):
529        return lib
530    return None
531
532
533  def check_symbols(self):
534    """Check whether all undefined symbols are resolved to a definition."""
535    all_elf_files = [self._file_under_test] + self._shared_libs
536    missing_symbols = []
537    for sym, imported_vers in self._file_under_test.imported.items():
538      for imported_ver in imported_vers:
539        lib = self._find_symbol_from_libs(all_elf_files, sym, imported_ver)
540        if not lib:
541          missing_symbols.append((sym, imported_ver))
542
543    if missing_symbols:
544      for sym, ver in sorted(missing_symbols):
545        if ver:
546          sym += '@' + ver
547        self._error(f'Unresolved symbol: {sym}')
548
549      self._note()
550      self._note('Some dependencies might be changed, thus the symbol(s) '
551                 'above cannot be resolved.')
552      self._note(f'Please re-build the prebuilt file: "{self._file_path}".')
553
554      self._note()
555      self._note('If this is a new prebuilt file and it is designed to have '
556                 'unresolved symbols, add one of the following properties:')
557      self._note('  Android.bp: allow_undefined_symbols: true,')
558      self._note('  Android.mk: LOCAL_ALLOW_UNDEFINED_SYMBOLS := true')
559
560      sys.exit(2)
561
562
563def _parse_args():
564  """Parse command line options."""
565  parser = argparse.ArgumentParser()
566
567  # Input file
568  parser.add_argument('file',
569                      help='Path to the input file to be checked')
570  parser.add_argument('--soname',
571                      help='Shared object name of the input file')
572
573  # Shared library dependencies
574  parser.add_argument('--shared-lib', action='append', default=[],
575                      help='Path to shared library dependencies')
576
577  # System Shared library names
578  parser.add_argument('--system-shared-lib', action='append', default=[],
579                      help='System shared libraries to be hidden from fix '
580                      'suggestions')
581
582  # Check options
583  parser.add_argument('--skip-bad-elf-magic', action='store_true',
584                      help='Ignore the input file without the ELF magic word')
585  parser.add_argument('--skip-unknown-elf-machine', action='store_true',
586                      help='Ignore the input file with unknown machine ID')
587  parser.add_argument('--allow-undefined-symbols', action='store_true',
588                      help='Ignore unresolved undefined symbols')
589  parser.add_argument('--max-page-size', action='store', type=int,
590                      help='Required page size alignment support')
591
592  # Other options
593  parser.add_argument('--llvm-readobj',
594                      help='Path to the llvm-readobj executable')
595
596  return parser.parse_args()
597
598
599def main():
600  """Main function"""
601  args = _parse_args()
602
603  llvm_readobj = args.llvm_readobj
604  if not llvm_readobj:
605    llvm_readobj = _get_llvm_readobj()
606
607  # Load ELF files
608  checker = Checker(llvm_readobj)
609  checker.load_file_under_test(
610    args.file, args.skip_bad_elf_magic, args.skip_unknown_elf_machine)
611  checker.load_shared_libs(args.shared_lib)
612
613  # Run checks
614  if args.soname:
615    checker.check_dt_soname(args.soname)
616
617  checker.check_dt_needed(args.system_shared_lib)
618
619  if args.max_page_size:
620    checker.check_max_page_size(args.max_page_size)
621
622  if not args.allow_undefined_symbols:
623    checker.check_symbols()
624
625
626if __name__ == '__main__':
627  main()
628