# -*- Python -*- vim: set syntax=python tabstop=4 expandtab cc=80: # ===----------------------------------------------------------------------===## # # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # # ===----------------------------------------------------------------------===## """ extract - A set of function that extract symbol lists from shared libraries. """ import os.path from os import environ import re import shutil import subprocess import sys from libcxx.sym_check import util extract_ignore_names = ["_init", "_fini"] class NMExtractor(object): """ NMExtractor - Extract symbol lists from libraries using nm. """ @staticmethod def find_tool(): """ Search for the nm executable and return the path. """ return shutil.which("nm") def __init__(self, static_lib): """ Initialize the nm executable and flags that will be used to extract symbols from shared libraries. """ self.nm_exe = self.find_tool() if self.nm_exe is None: # ERROR no NM found print("ERROR: Could not find nm") sys.exit(1) self.static_lib = static_lib self.flags = ["-P", "-g"] if sys.platform.startswith("aix"): # AIX nm demangles symbols by default, so suppress that. self.flags.append("-C") def extract(self, lib): """ Extract symbols from a library and return the results as a dict of parsed symbols. """ cmd = [self.nm_exe] + self.flags + [lib] out = subprocess.check_output(cmd).decode() fmt_syms = (self._extract_sym(l) for l in out.splitlines() if l.strip()) # Cast symbol to string. final_syms = (repr(s) for s in fmt_syms if self._want_sym(s)) # Make unique and sort strings. tmp_list = list(sorted(set(final_syms))) # Cast string back to symbol. return util.read_syms_from_list(tmp_list) def _extract_sym(self, sym_str): bits = sym_str.split() # Everything we want has at least two columns. if len(bits) < 2: return None new_sym = { "name": bits[0], "type": bits[1], "is_defined": (bits[1].lower() != "u"), } new_sym["name"] = new_sym["name"].replace("@@", "@") new_sym = self._transform_sym_type(new_sym) # NM types which we want to save the size for. if new_sym["type"] == "OBJECT" and len(bits) > 3: new_sym["size"] = int(bits[3], 16) return new_sym @staticmethod def _want_sym(sym): """ Check that s is a valid symbol that we want to keep. """ if sym is None or len(sym) < 2: return False if sym["name"] in extract_ignore_names: return False bad_types = ["t", "b", "r", "d", "w"] return sym["type"] not in bad_types and sym["name"] not in [ "__bss_start", "_end", "_edata", ] @staticmethod def _transform_sym_type(sym): """ Map the nm single letter output for type to either FUNC or OBJECT. If the type is not recognized it is left unchanged. """ func_types = ["T", "W"] obj_types = ["B", "D", "R", "V", "S"] if sym["type"] in func_types: sym["type"] = "FUNC" elif sym["type"] in obj_types: sym["type"] = "OBJECT" return sym class ReadElfExtractor(object): """ ReadElfExtractor - Extract symbol lists from libraries using readelf. """ @staticmethod def find_tool(): """ Search for the readelf executable and return the path. """ return shutil.which("readelf") def __init__(self, static_lib): """ Initialize the readelf executable and flags that will be used to extract symbols from shared libraries. """ self.tool = self.find_tool() if self.tool is None: # ERROR no NM found print("ERROR: Could not find readelf") sys.exit(1) # TODO: Support readelf for reading symbols from archives assert not static_lib and "RealElf does not yet support static libs" self.flags = ["--wide", "--symbols"] def extract(self, lib): """ Extract symbols from a library and return the results as a dict of parsed symbols. """ cmd = [self.tool] + self.flags + [lib] out = subprocess.check_output(cmd).decode() dyn_syms = self.get_dynsym_table(out) return self.process_syms(dyn_syms) def process_syms(self, sym_list): new_syms = [] for s in sym_list: parts = s.split() if not parts: continue assert len(parts) == 7 or len(parts) == 8 or len(parts) == 9 if len(parts) == 7: continue new_sym = { "name": parts[7], "size": int(parts[2]), "type": parts[3], "is_defined": (parts[6] != "UND"), } assert new_sym["type"] in ["OBJECT", "FUNC", "NOTYPE", "TLS"] if new_sym["name"] in extract_ignore_names: continue if new_sym["type"] == "NOTYPE": continue if new_sym["type"] == "FUNC": del new_sym["size"] new_syms += [new_sym] return new_syms def get_dynsym_table(self, out): lines = out.splitlines() start = -1 end = -1 for i in range(len(lines)): # Accept both GNU and ELF Tool Chain readelf format. Some versions # of ELF Tool Chain readelf use ( ) around the symbol table name # instead of ' ', and omit the blank line before the heading. if re.match(r"Symbol table ['(].dynsym[')]", lines[i]): start = i + 2 elif start != -1 and end == -1: if not lines[i].strip(): end = i + 1 if lines[i].startswith("Symbol table ("): end = i assert start != -1 if end == -1: end = len(lines) return lines[start:end] class AIXDumpExtractor(object): """ AIXDumpExtractor - Extract symbol lists from libraries using AIX dump. """ @staticmethod def find_tool(): """ Search for the dump executable and return the path. """ return shutil.which("dump") def __init__(self, static_lib): """ Initialize the dump executable and flags that will be used to extract symbols from shared libraries. """ # TODO: Support dump for reading symbols from static libraries assert not static_lib and "static libs not yet supported with dump" self.tool = self.find_tool() if self.tool is None: print("ERROR: Could not find dump") sys.exit(1) self.flags = ["-n", "-v"] object_mode = environ.get("OBJECT_MODE") if object_mode == "32": self.flags += ["-X32"] elif object_mode == "64": self.flags += ["-X64"] else: self.flags += ["-X32_64"] def extract(self, lib): """ Extract symbols from a library and return the results as a dict of parsed symbols. """ cmd = [self.tool] + self.flags + [lib] out = subprocess.check_output(cmd).decode() loader_syms = self.get_loader_symbol_table(out) return self.process_syms(loader_syms) def process_syms(self, sym_list): new_syms = [] for s in sym_list: parts = s.split() if not parts: continue assert len(parts) == 8 or len(parts) == 7 if len(parts) == 7: continue new_sym = { "name": parts[7], "type": "FUNC" if parts[4] == "DS" else "OBJECT", "is_defined": (parts[5] != "EXTref"), "storage_mapping_class": parts[4], "import_export": parts[3], } if new_sym["name"] in extract_ignore_names: continue new_syms += [new_sym] return new_syms def get_loader_symbol_table(self, out): lines = out.splitlines() return filter(lambda n: re.match(r"^\[[0-9]+\]", n), lines) @staticmethod def is_shared_lib(lib): """ Check for the shared object flag in XCOFF headers of the input file or library archive. """ dump = AIXDumpExtractor.find_tool() if dump is None: print("ERROR: Could not find dump") sys.exit(1) cmd = [dump, "-X32_64", "-ov", lib] out = subprocess.check_output(cmd).decode() return out.find("SHROBJ") != -1 def is_static_library(lib_file): """ Determine if a given library is static or shared. """ if sys.platform.startswith("aix"): # An AIX library could be both, but for simplicity assume it isn't. return not AIXDumpExtractor.is_shared_lib(lib_file) else: _, ext = os.path.splitext(lib_file) return ext == ".a" def extract_symbols(lib_file, static_lib=None): """ Extract and return a list of symbols extracted from a static or dynamic library. The symbols are extracted using dump, nm or readelf. They are then filtered and formated. Finally the symbols are made unique. """ if static_lib is None: static_lib = is_static_library(lib_file) if sys.platform.startswith("aix"): extractor = AIXDumpExtractor(static_lib=static_lib) elif ReadElfExtractor.find_tool() and not static_lib: extractor = ReadElfExtractor(static_lib=static_lib) else: extractor = NMExtractor(static_lib=static_lib) return extractor.extract(lib_file)