• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2#
3# Copyright (C) 2019 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""ELF file checker.
18
19This command ensures all undefined symbols in an ELF file can be resolved to
20global (or weak) symbols defined in shared objects specified in DT_NEEDED
21entries.
22"""
23
24from __future__ import print_function
25
26import argparse
27import collections
28import os
29import os.path
30import re
31import struct
32import subprocess
33import sys
34
35
36_ELF_MAGIC = b'\x7fELF'
37
38
39# Known machines
40_EM_386 = 3
41_EM_ARM = 40
42_EM_X86_64 = 62
43_EM_AARCH64 = 183
44
45_KNOWN_MACHINES = {_EM_386, _EM_ARM, _EM_X86_64, _EM_AARCH64}
46
47
48# ELF header struct
49_ELF_HEADER_STRUCT = (
50  ('ei_magic', '4s'),
51  ('ei_class', 'B'),
52  ('ei_data', 'B'),
53  ('ei_version', 'B'),
54  ('ei_osabi', 'B'),
55  ('ei_pad', '8s'),
56  ('e_type', 'H'),
57  ('e_machine', 'H'),
58  ('e_version', 'I'),
59)
60
61_ELF_HEADER_STRUCT_FMT = ''.join(_fmt for _, _fmt in _ELF_HEADER_STRUCT)
62
63
64ELFHeader = collections.namedtuple(
65  'ELFHeader', [_name for _name, _ in _ELF_HEADER_STRUCT])
66
67
68ELF = collections.namedtuple(
69  'ELF',
70  ('dt_soname', 'dt_needed', 'imported', 'exported', 'header'))
71
72
73def _get_os_name():
74  """Get the host OS name."""
75  if sys.platform == 'linux2':
76    return 'linux'
77  if sys.platform == 'darwin':
78    return 'darwin'
79  raise ValueError(sys.platform + ' is not supported')
80
81
82def _get_build_top():
83  """Find the build top of the source tree ($ANDROID_BUILD_TOP)."""
84  prev_path = None
85  curr_path = os.path.abspath(os.getcwd())
86  while prev_path != curr_path:
87    if os.path.exists(os.path.join(curr_path, '.repo')):
88      return curr_path
89    prev_path = curr_path
90    curr_path = os.path.dirname(curr_path)
91  return None
92
93
94def _select_latest_llvm_version(versions):
95  """Select the latest LLVM prebuilts version from a set of versions."""
96  pattern = re.compile('clang-r([0-9]+)([a-z]?)')
97  found_rev = 0
98  found_ver = None
99  for curr_ver in versions:
100    match = pattern.match(curr_ver)
101    if not match:
102      continue
103    curr_rev = int(match.group(1))
104    if not found_ver or curr_rev > found_rev or (
105        curr_rev == found_rev and curr_ver > found_ver):
106      found_rev = curr_rev
107      found_ver = curr_ver
108  return found_ver
109
110
111def _get_latest_llvm_version(llvm_dir):
112  """Find the latest LLVM prebuilts version from `llvm_dir`."""
113  return _select_latest_llvm_version(os.listdir(llvm_dir))
114
115
116def _get_llvm_dir():
117  """Find the path to LLVM prebuilts."""
118  build_top = _get_build_top()
119
120  llvm_prebuilts_base = os.environ.get('LLVM_PREBUILTS_BASE')
121  if not llvm_prebuilts_base:
122    llvm_prebuilts_base = os.path.join('prebuilts', 'clang', 'host')
123
124  llvm_dir = os.path.join(
125    build_top, llvm_prebuilts_base, _get_os_name() + '-x86')
126
127  if not os.path.exists(llvm_dir):
128    return None
129
130  llvm_prebuilts_version = os.environ.get('LLVM_PREBUILTS_VERSION')
131  if not llvm_prebuilts_version:
132    llvm_prebuilts_version = _get_latest_llvm_version(llvm_dir)
133
134  llvm_dir = os.path.join(llvm_dir, llvm_prebuilts_version)
135
136  if not os.path.exists(llvm_dir):
137    return None
138
139  return llvm_dir
140
141
142def _get_llvm_readobj():
143  """Find the path to llvm-readobj executable."""
144  llvm_dir = _get_llvm_dir()
145  llvm_readobj = os.path.join(llvm_dir, 'bin', 'llvm-readobj')
146  return llvm_readobj if os.path.exists(llvm_readobj) else 'llvm-readobj'
147
148
149class ELFError(ValueError):
150  """Generic ELF parse error"""
151  pass
152
153
154class ELFInvalidMagicError(ELFError):
155  """Invalid ELF magic word error"""
156  def __init__(self):
157    super(ELFInvalidMagicError, self).__init__('bad ELF magic')
158
159
160class ELFParser(object):
161  """ELF file parser"""
162
163  @classmethod
164  def _read_elf_header(cls, elf_file_path):
165    """Read the ELF magic word from the beginning of the file."""
166    with open(elf_file_path, 'rb') as elf_file:
167      buf = elf_file.read(struct.calcsize(_ELF_HEADER_STRUCT_FMT))
168      try:
169        return ELFHeader(*struct.unpack(_ELF_HEADER_STRUCT_FMT, buf))
170      except struct.error:
171        return None
172
173
174  @classmethod
175  def open(cls, elf_file_path, llvm_readobj):
176    """Open and parse the ELF file."""
177    # Parse the ELF header to check the magic word.
178    header = cls._read_elf_header(elf_file_path)
179    if not header or header.ei_magic != _ELF_MAGIC:
180      raise ELFInvalidMagicError()
181
182    # Run llvm-readobj and parse the output.
183    return cls._read_llvm_readobj(elf_file_path, header, llvm_readobj)
184
185
186  @classmethod
187  def _find_prefix(cls, pattern, lines_it):
188    """Iterate `lines_it` until finding a string that starts with `pattern`."""
189    for line in lines_it:
190      if line.startswith(pattern):
191        return True
192    return False
193
194
195  @classmethod
196  def _read_llvm_readobj(cls, elf_file_path, header, llvm_readobj):
197    """Run llvm-readobj and parse the output."""
198    cmd = [llvm_readobj, '--dynamic-table', '--dyn-symbols', elf_file_path]
199    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
200    out, _ = proc.communicate()
201    rc = proc.returncode
202    if rc != 0:
203      raise subprocess.CalledProcessError(rc, cmd, out)
204    lines = out.splitlines()
205    return cls._parse_llvm_readobj(elf_file_path, header, lines)
206
207
208  @classmethod
209  def _parse_llvm_readobj(cls, elf_file_path, header, lines):
210    """Parse the output of llvm-readobj."""
211    lines_it = iter(lines)
212    dt_soname, dt_needed = cls._parse_dynamic_table(elf_file_path, lines_it)
213    imported, exported = cls._parse_dynamic_symbols(lines_it)
214    return ELF(dt_soname, dt_needed, imported, exported, header)
215
216
217  _DYNAMIC_SECTION_START_PATTERN = 'DynamicSection ['
218
219  _DYNAMIC_SECTION_NEEDED_PATTERN = re.compile(
220    '^  0x[0-9a-fA-F]+\\s+NEEDED\\s+Shared library: \\[(.*)\\]$')
221
222  _DYNAMIC_SECTION_SONAME_PATTERN = re.compile(
223    '^  0x[0-9a-fA-F]+\\s+SONAME\\s+Library soname: \\[(.*)\\]$')
224
225  _DYNAMIC_SECTION_END_PATTERN = ']'
226
227
228  @classmethod
229  def _parse_dynamic_table(cls, elf_file_path, lines_it):
230    """Parse the dynamic table section."""
231    dt_soname = os.path.basename(elf_file_path)
232    dt_needed = []
233
234    dynamic = cls._find_prefix(cls._DYNAMIC_SECTION_START_PATTERN, lines_it)
235    if not dynamic:
236      return (dt_soname, dt_needed)
237
238    for line in lines_it:
239      if line == cls._DYNAMIC_SECTION_END_PATTERN:
240        break
241
242      match = cls._DYNAMIC_SECTION_NEEDED_PATTERN.match(line)
243      if match:
244        dt_needed.append(match.group(1))
245        continue
246
247      match = cls._DYNAMIC_SECTION_SONAME_PATTERN.match(line)
248      if match:
249        dt_soname = match.group(1)
250        continue
251
252    return (dt_soname, dt_needed)
253
254
255  _DYNAMIC_SYMBOLS_START_PATTERN = 'DynamicSymbols ['
256  _DYNAMIC_SYMBOLS_END_PATTERN = ']'
257
258  _SYMBOL_ENTRY_START_PATTERN = '  Symbol {'
259  _SYMBOL_ENTRY_PATTERN = re.compile('^    ([A-Za-z0-9_]+): (.*)$')
260  _SYMBOL_ENTRY_PAREN_PATTERN = re.compile(
261    '\\s+\\((?:(?:\\d+)|(?:0x[0-9a-fA-F]+))\\)$')
262  _SYMBOL_ENTRY_END_PATTERN = '  }'
263
264
265  @staticmethod
266  def _parse_symbol_name(name_with_version):
267    """Split `name_with_version` into name and version. This function may split
268    at last occurrence of `@@` or `@`."""
269    pos = name_with_version.rfind('@')
270    if pos == -1:
271      name = name_with_version
272      version = ''
273    else:
274      if pos > 0 and name_with_version[pos - 1] == '@':
275        name = name_with_version[0:pos - 1]
276      else:
277        name = name_with_version[0:pos]
278      version = name_with_version[pos + 1:]
279    return (name, version)
280
281
282  @classmethod
283  def _parse_dynamic_symbols(cls, lines_it):
284    """Parse dynamic symbol table and collect imported and exported symbols."""
285    imported = collections.defaultdict(set)
286    exported = collections.defaultdict(set)
287
288    for symbol in cls._parse_dynamic_symbols_internal(lines_it):
289      name, version = cls._parse_symbol_name(symbol['Name'])
290      if name:
291        if symbol['Section'] == 'Undefined':
292          if symbol['Binding'] != 'Weak':
293            imported[name].add(version)
294        else:
295          if symbol['Binding'] != 'Local':
296            exported[name].add(version)
297
298    # Freeze the returned imported/exported dict.
299    return (dict(imported), dict(exported))
300
301
302  @classmethod
303  def _parse_dynamic_symbols_internal(cls, lines_it):
304    """Parse symbols entries and yield each symbols."""
305
306    if not cls._find_prefix(cls._DYNAMIC_SYMBOLS_START_PATTERN, lines_it):
307      return
308
309    for line in lines_it:
310      if line == cls._DYNAMIC_SYMBOLS_END_PATTERN:
311        return
312
313      if line == cls._SYMBOL_ENTRY_START_PATTERN:
314        symbol = {}
315        continue
316
317      if line == cls._SYMBOL_ENTRY_END_PATTERN:
318        yield symbol
319        symbol = None
320        continue
321
322      match = cls._SYMBOL_ENTRY_PATTERN.match(line)
323      if match:
324        key = match.group(1)
325        value = cls._SYMBOL_ENTRY_PAREN_PATTERN.sub('', match.group(2))
326        symbol[key] = value
327        continue
328
329
330class Checker(object):
331  """ELF file checker that checks DT_SONAME, DT_NEEDED, and symbols."""
332
333  def __init__(self, llvm_readobj):
334    self._file_path = ''
335    self._file_under_test = None
336    self._shared_libs = []
337
338    self._llvm_readobj = llvm_readobj
339
340
341  if sys.stderr.isatty():
342    _ERROR_TAG = '\033[0;1;31merror:\033[m'  # Red error
343    _NOTE_TAG = '\033[0;1;30mnote:\033[m'  # Black note
344  else:
345    _ERROR_TAG = 'error:'  # Red error
346    _NOTE_TAG = 'note:'  # Black note
347
348
349  def _error(self, *args):
350    """Emit an error to stderr."""
351    print(self._file_path + ': ' + self._ERROR_TAG, *args, file=sys.stderr)
352
353
354  def _note(self, *args):
355    """Emit a note to stderr."""
356    print(self._file_path + ': ' + self._NOTE_TAG, *args, file=sys.stderr)
357
358
359  def _load_elf_file(self, path, skip_bad_elf_magic):
360    """Load an ELF file from the `path`."""
361    try:
362      return ELFParser.open(path, self._llvm_readobj)
363    except (IOError, OSError):
364      self._error('Failed to open "{}".'.format(path))
365      sys.exit(2)
366    except ELFInvalidMagicError:
367      if skip_bad_elf_magic:
368        sys.exit(0)
369      else:
370        self._error('File "{}" must have a valid ELF magic word.'.format(path))
371        sys.exit(2)
372    except:
373      self._error('An unknown error occurred while opening "{}".'.format(path))
374      raise
375
376
377  def load_file_under_test(self, path, skip_bad_elf_magic,
378                           skip_unknown_elf_machine):
379    """Load file-under-test (either an executable or a shared lib)."""
380    self._file_path = path
381    self._file_under_test = self._load_elf_file(path, skip_bad_elf_magic)
382
383    if skip_unknown_elf_machine and \
384        self._file_under_test.header.e_machine not in _KNOWN_MACHINES:
385      sys.exit(0)
386
387
388  def load_shared_libs(self, shared_lib_paths):
389    """Load shared libraries."""
390    for path in shared_lib_paths:
391      self._shared_libs.append(self._load_elf_file(path, False))
392
393
394  def check_dt_soname(self, soname):
395    """Check whether DT_SONAME matches installation file name."""
396    if self._file_under_test.dt_soname != soname:
397      self._error('DT_SONAME "{}" must be equal to the file name "{}".'
398                  .format(self._file_under_test.dt_soname, soname))
399      sys.exit(2)
400
401
402  def check_dt_needed(self, system_shared_lib_names):
403    """Check whether all DT_NEEDED entries are specified in the build
404    system."""
405
406    missing_shared_libs = False
407
408    # Collect the DT_SONAMEs from shared libs specified in the build system.
409    specified_sonames = {lib.dt_soname for lib in self._shared_libs}
410
411    # Chech whether all DT_NEEDED entries are specified.
412    for lib in self._file_under_test.dt_needed:
413      if lib not in specified_sonames:
414        self._error('DT_NEEDED "{}" is not specified in shared_libs.'
415                    .format(lib.decode('utf-8')))
416        missing_shared_libs = True
417
418    if missing_shared_libs:
419      dt_needed = sorted(set(self._file_under_test.dt_needed))
420      modules = [re.sub('\\.so$', '', lib) for lib in dt_needed]
421
422      # Remove system shared libraries from the suggestion since they are added
423      # by default.
424      modules = [name for name in modules
425                 if name not in system_shared_lib_names]
426
427      self._note()
428      self._note('Fix suggestions:')
429      self._note(
430        '  Android.bp: shared_libs: [' +
431        ', '.join('"' + module + '"' for module in modules) + '],')
432      self._note(
433        '  Android.mk: LOCAL_SHARED_LIBRARIES := ' + ' '.join(modules))
434
435      self._note()
436      self._note('If the fix above doesn\'t work, bypass this check with:')
437      self._note('  Android.bp: check_elf_files: false,')
438      self._note('  Android.mk: LOCAL_CHECK_ELF_FILES := false')
439
440      sys.exit(2)
441
442
443  @staticmethod
444  def _find_symbol(lib, name, version):
445    """Check whether the symbol name and version matches a definition in
446    lib."""
447    try:
448      lib_sym_vers = lib.exported[name]
449    except KeyError:
450      return False
451    if version == '':  # Symbol version is not requested
452      return True
453    return version in lib_sym_vers
454
455
456  @classmethod
457  def _find_symbol_from_libs(cls, libs, name, version):
458    """Check whether the symbol name and version is defined in one of the
459    shared libraries in libs."""
460    for lib in libs:
461      if cls._find_symbol(lib, name, version):
462        return lib
463    return None
464
465
466  def check_symbols(self):
467    """Check whether all undefined symbols are resolved to a definition."""
468    all_elf_files = [self._file_under_test] + self._shared_libs
469    missing_symbols = []
470    for sym, imported_vers in self._file_under_test.imported.iteritems():
471      for imported_ver in imported_vers:
472        lib = self._find_symbol_from_libs(all_elf_files, sym, imported_ver)
473        if not lib:
474          missing_symbols.append((sym, imported_ver))
475
476    if missing_symbols:
477      for sym, ver in sorted(missing_symbols):
478        sym = sym.decode('utf-8')
479        if ver:
480          sym += '@' + ver.decode('utf-8')
481        self._error('Unresolved symbol: {}'.format(sym))
482
483      self._note()
484      self._note('Some dependencies might be changed, thus the symbol(s) '
485                 'above cannot be resolved.')
486      self._note('Please re-build the prebuilt file: "{}".'
487                 .format(self._file_path))
488
489      self._note()
490      self._note('If this is a new prebuilt file and it is designed to have '
491                 'unresolved symbols, add one of the following properties:')
492      self._note('  Android.bp: allow_undefined_symbols: true,')
493      self._note('  Android.mk: LOCAL_ALLOW_UNDEFINED_SYMBOLS := true')
494
495      sys.exit(2)
496
497
498def _parse_args():
499  """Parse command line options."""
500  parser = argparse.ArgumentParser()
501
502  # Input file
503  parser.add_argument('file',
504                      help='Path to the input file to be checked')
505  parser.add_argument('--soname',
506                      help='Shared object name of the input file')
507
508  # Shared library dependencies
509  parser.add_argument('--shared-lib', action='append', default=[],
510                      help='Path to shared library dependencies')
511
512  # System Shared library names
513  parser.add_argument('--system-shared-lib', action='append', default=[],
514                      help='System shared libraries to be hidden from fix '
515                      'suggestions')
516
517  # Check options
518  parser.add_argument('--skip-bad-elf-magic', action='store_true',
519                      help='Ignore the input file without the ELF magic word')
520  parser.add_argument('--skip-unknown-elf-machine', action='store_true',
521                      help='Ignore the input file with unknown machine ID')
522  parser.add_argument('--allow-undefined-symbols', action='store_true',
523                      help='Ignore unresolved undefined symbols')
524
525  # Other options
526  parser.add_argument('--llvm-readobj',
527                      help='Path to the llvm-readobj executable')
528
529  return parser.parse_args()
530
531
532def main():
533  """Main function"""
534  args = _parse_args()
535
536  llvm_readobj = args.llvm_readobj
537  if not llvm_readobj:
538    llvm_readobj = _get_llvm_readobj()
539
540  # Load ELF files
541  checker = Checker(llvm_readobj)
542  checker.load_file_under_test(
543    args.file, args.skip_bad_elf_magic, args.skip_unknown_elf_machine)
544  checker.load_shared_libs(args.shared_lib)
545
546  # Run checks
547  if args.soname:
548    checker.check_dt_soname(args.soname)
549
550  checker.check_dt_needed(args.system_shared_lib)
551
552  if not args.allow_undefined_symbols:
553    checker.check_symbols()
554
555
556if __name__ == '__main__':
557  main()
558