1# -*- Python -*- vim: set syntax=python tabstop=4 expandtab cc=80: 2#===----------------------------------------------------------------------===## 3# 4# The LLVM Compiler Infrastructure 5# 6# This file is dual licensed under the MIT and the University of Illinois Open 7# Source Licenses. See LICENSE.TXT for details. 8# 9#===----------------------------------------------------------------------===## 10""" 11extract - A set of function that extract symbol lists from shared libraries. 12""" 13import distutils.spawn 14import sys 15import re 16 17import libcxx.util 18from libcxx.sym_check import util 19 20extract_ignore_names = ['_init', '_fini'] 21 22class NMExtractor(object): 23 """ 24 NMExtractor - Extract symbol lists from libraries using nm. 25 """ 26 27 @staticmethod 28 def find_tool(): 29 """ 30 Search for the nm executable and return the path. 31 """ 32 return distutils.spawn.find_executable('nm') 33 34 def __init__(self): 35 """ 36 Initialize the nm executable and flags that will be used to extract 37 symbols from shared libraries. 38 """ 39 self.nm_exe = self.find_tool() 40 if self.nm_exe is None: 41 # ERROR no NM found 42 print("ERROR: Could not find nm") 43 sys.exit(1) 44 self.flags = ['-P', '-g'] 45 46 def extract(self, lib): 47 """ 48 Extract symbols from a library and return the results as a dict of 49 parsed symbols. 50 """ 51 cmd = [self.nm_exe] + self.flags + [lib] 52 out, _, exit_code = libcxx.util.executeCommandVerbose(cmd) 53 if exit_code != 0: 54 raise RuntimeError('Failed to run %s on %s' % (self.nm_exe, lib)) 55 fmt_syms = (self._extract_sym(l) 56 for l in out.splitlines() if l.strip()) 57 # Cast symbol to string. 58 final_syms = (repr(s) for s in fmt_syms if self._want_sym(s)) 59 # Make unique and sort strings. 60 tmp_list = list(sorted(set(final_syms))) 61 # Cast string back to symbol. 62 return util.read_syms_from_list(tmp_list) 63 64 def _extract_sym(self, sym_str): 65 bits = sym_str.split() 66 # Everything we want has at least two columns. 67 if len(bits) < 2: 68 return None 69 new_sym = { 70 'name': bits[0], 71 'type': bits[1], 72 'is_defined': (bits[1].lower() != 'u') 73 } 74 new_sym['name'] = new_sym['name'].replace('@@', '@') 75 new_sym = self._transform_sym_type(new_sym) 76 # NM types which we want to save the size for. 77 if new_sym['type'] == 'OBJECT' and len(bits) > 3: 78 new_sym['size'] = int(bits[3], 16) 79 return new_sym 80 81 @staticmethod 82 def _want_sym(sym): 83 """ 84 Check that s is a valid symbol that we want to keep. 85 """ 86 if sym is None or len(sym) < 2: 87 return False 88 if sym['name'] in extract_ignore_names: 89 return False 90 bad_types = ['t', 'b', 'r', 'd', 'w'] 91 return (sym['type'] not in bad_types 92 and sym['name'] not in ['__bss_start', '_end', '_edata']) 93 94 @staticmethod 95 def _transform_sym_type(sym): 96 """ 97 Map the nm single letter output for type to either FUNC or OBJECT. 98 If the type is not recognized it is left unchanged. 99 """ 100 func_types = ['T', 'W'] 101 obj_types = ['B', 'D', 'R', 'V', 'S'] 102 if sym['type'] in func_types: 103 sym['type'] = 'FUNC' 104 elif sym['type'] in obj_types: 105 sym['type'] = 'OBJECT' 106 return sym 107 108class ReadElfExtractor(object): 109 """ 110 ReadElfExtractor - Extract symbol lists from libraries using readelf. 111 """ 112 113 @staticmethod 114 def find_tool(): 115 """ 116 Search for the readelf executable and return the path. 117 """ 118 return distutils.spawn.find_executable('readelf') 119 120 def __init__(self): 121 """ 122 Initialize the readelf executable and flags that will be used to 123 extract symbols from shared libraries. 124 """ 125 self.tool = self.find_tool() 126 if self.tool is None: 127 # ERROR no NM found 128 print("ERROR: Could not find readelf") 129 sys.exit(1) 130 self.flags = ['--wide', '--symbols'] 131 132 def extract(self, lib): 133 """ 134 Extract symbols from a library and return the results as a dict of 135 parsed symbols. 136 """ 137 cmd = [self.tool] + self.flags + [lib] 138 out, _, exit_code = libcxx.util.executeCommandVerbose(cmd) 139 if exit_code != 0: 140 raise RuntimeError('Failed to run %s on %s' % (self.nm_exe, lib)) 141 dyn_syms = self.get_dynsym_table(out) 142 return self.process_syms(dyn_syms) 143 144 def process_syms(self, sym_list): 145 new_syms = [] 146 for s in sym_list: 147 parts = s.split() 148 if not parts: 149 continue 150 assert len(parts) == 7 or len(parts) == 8 or len(parts) == 9 151 if len(parts) == 7: 152 continue 153 new_sym = { 154 'name': parts[7], 155 'size': int(parts[2]), 156 'type': parts[3], 157 'is_defined': (parts[6] != 'UND') 158 } 159 assert new_sym['type'] in ['OBJECT', 'FUNC', 'NOTYPE'] 160 if new_sym['name'] in extract_ignore_names: 161 continue 162 if new_sym['type'] == 'NOTYPE': 163 continue 164 if new_sym['type'] == 'FUNC': 165 del new_sym['size'] 166 new_syms += [new_sym] 167 return new_syms 168 169 def get_dynsym_table(self, out): 170 lines = out.splitlines() 171 start = -1 172 end = -1 173 for i in range(len(lines)): 174 if lines[i].startswith("Symbol table '.dynsym'"): 175 start = i + 2 176 if start != -1 and end == -1 and not lines[i].strip(): 177 end = i + 1 178 assert start != -1 179 if end == -1: 180 end = len(lines) 181 return lines[start:end] 182 183 184def extract_symbols(lib_file): 185 """ 186 Extract and return a list of symbols extracted from a dynamic library. 187 The symbols are extracted using NM. They are then filtered and formated. 188 Finally they symbols are made unique. 189 """ 190 if ReadElfExtractor.find_tool(): 191 extractor = ReadElfExtractor() 192 else: 193 extractor = NMExtractor() 194 return extractor.extract(lib_file) 195