• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# -*- Python -*- vim: set syntax=python tabstop=4 expandtab cc=80:
2#===----------------------------------------------------------------------===##
3#
4#                     The LLVM Compiler Infrastructure
5#
6# This file is dual licensed under the MIT and the University of Illinois Open
7# Source Licenses. See LICENSE.TXT for details.
8#
9#===----------------------------------------------------------------------===##
10"""
11extract - A set of function that extract symbol lists from shared libraries.
12"""
13import distutils.spawn
14import sys
15import re
16
17import libcxx.util
18from libcxx.sym_check import util
19
20extract_ignore_names = ['_init', '_fini']
21
22class NMExtractor(object):
23    """
24    NMExtractor - Extract symbol lists from libraries using nm.
25    """
26
27    @staticmethod
28    def find_tool():
29        """
30        Search for the nm executable and return the path.
31        """
32        return distutils.spawn.find_executable('nm')
33
34    def __init__(self):
35        """
36        Initialize the nm executable and flags that will be used to extract
37        symbols from shared libraries.
38        """
39        self.nm_exe = self.find_tool()
40        if self.nm_exe is None:
41            # ERROR no NM found
42            print("ERROR: Could not find nm")
43            sys.exit(1)
44        self.flags = ['-P', '-g']
45
46    def extract(self, lib):
47        """
48        Extract symbols from a library and return the results as a dict of
49        parsed symbols.
50        """
51        cmd = [self.nm_exe] + self.flags + [lib]
52        out, _, exit_code = libcxx.util.executeCommandVerbose(cmd)
53        if exit_code != 0:
54            raise RuntimeError('Failed to run %s on %s' % (self.nm_exe, lib))
55        fmt_syms = (self._extract_sym(l)
56                    for l in out.splitlines() if l.strip())
57            # Cast symbol to string.
58        final_syms = (repr(s) for s in fmt_syms if self._want_sym(s))
59        # Make unique and sort strings.
60        tmp_list = list(sorted(set(final_syms)))
61        # Cast string back to symbol.
62        return util.read_syms_from_list(tmp_list)
63
64    def _extract_sym(self, sym_str):
65        bits = sym_str.split()
66        # Everything we want has at least two columns.
67        if len(bits) < 2:
68            return None
69        new_sym = {
70            'name': bits[0],
71            'type': bits[1],
72            'is_defined': (bits[1].lower() != 'u')
73        }
74        new_sym['name'] = new_sym['name'].replace('@@', '@')
75        new_sym = self._transform_sym_type(new_sym)
76        # NM types which we want to save the size for.
77        if new_sym['type'] == 'OBJECT' and len(bits) > 3:
78            new_sym['size'] = int(bits[3], 16)
79        return new_sym
80
81    @staticmethod
82    def _want_sym(sym):
83        """
84        Check that s is a valid symbol that we want to keep.
85        """
86        if sym is None or len(sym) < 2:
87            return False
88        if sym['name'] in extract_ignore_names:
89            return False
90        bad_types = ['t', 'b', 'r', 'd', 'w']
91        return (sym['type'] not in bad_types
92                and sym['name'] not in ['__bss_start', '_end', '_edata'])
93
94    @staticmethod
95    def _transform_sym_type(sym):
96        """
97        Map the nm single letter output for type to either FUNC or OBJECT.
98        If the type is not recognized it is left unchanged.
99        """
100        func_types = ['T', 'W']
101        obj_types = ['B', 'D', 'R', 'V', 'S']
102        if sym['type'] in func_types:
103            sym['type'] = 'FUNC'
104        elif sym['type'] in obj_types:
105            sym['type'] = 'OBJECT'
106        return sym
107
108class ReadElfExtractor(object):
109    """
110    ReadElfExtractor - Extract symbol lists from libraries using readelf.
111    """
112
113    @staticmethod
114    def find_tool():
115        """
116        Search for the readelf executable and return the path.
117        """
118        return distutils.spawn.find_executable('readelf')
119
120    def __init__(self):
121        """
122        Initialize the readelf executable and flags that will be used to
123        extract symbols from shared libraries.
124        """
125        self.tool = self.find_tool()
126        if self.tool is None:
127            # ERROR no NM found
128            print("ERROR: Could not find readelf")
129            sys.exit(1)
130        self.flags = ['--wide', '--symbols']
131
132    def extract(self, lib):
133        """
134        Extract symbols from a library and return the results as a dict of
135        parsed symbols.
136        """
137        cmd = [self.tool] + self.flags + [lib]
138        out, _, exit_code = libcxx.util.executeCommandVerbose(cmd)
139        if exit_code != 0:
140            raise RuntimeError('Failed to run %s on %s' % (self.nm_exe, lib))
141        dyn_syms = self.get_dynsym_table(out)
142        return self.process_syms(dyn_syms)
143
144    def process_syms(self, sym_list):
145        new_syms = []
146        for s in sym_list:
147            parts = s.split()
148            if not parts:
149                continue
150            assert len(parts) == 7 or len(parts) == 8 or len(parts) == 9
151            if len(parts) == 7:
152                continue
153            new_sym = {
154                'name': parts[7],
155                'size': int(parts[2]),
156                'type': parts[3],
157                'is_defined': (parts[6] != 'UND')
158            }
159            assert new_sym['type'] in ['OBJECT', 'FUNC', 'NOTYPE']
160            if new_sym['name'] in extract_ignore_names:
161                continue
162            if new_sym['type'] == 'NOTYPE':
163                continue
164            if new_sym['type'] == 'FUNC':
165                del new_sym['size']
166            new_syms += [new_sym]
167        return new_syms
168
169    def get_dynsym_table(self, out):
170        lines = out.splitlines()
171        start = -1
172        end = -1
173        for i in range(len(lines)):
174            if lines[i].startswith("Symbol table '.dynsym'"):
175                start = i + 2
176            if start != -1 and end == -1 and not lines[i].strip():
177                end = i + 1
178        assert start != -1
179        if end == -1:
180            end = len(lines)
181        return lines[start:end]
182
183
184def extract_symbols(lib_file):
185    """
186    Extract and return a list of symbols extracted from a dynamic library.
187    The symbols are extracted using NM. They are then filtered and formated.
188    Finally they symbols are made unique.
189    """
190    if ReadElfExtractor.find_tool():
191        extractor = ReadElfExtractor()
192    else:
193        extractor = NMExtractor()
194    return extractor.extract(lib_file)
195