1# Copyright 2021 The Pigweed Authors 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); you may not 4# use this file except in compliance with the License. You may obtain a copy of 5# the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12# License for the specific language governing permissions and limitations under 13# the License. 14"""A symbolizer based on llvm-symbolizer.""" 15 16import shutil 17import subprocess 18import threading 19import json 20from typing import Optional, Tuple 21from pathlib import Path 22from pw_symbolizer import symbolizer 23 24 25class LlvmSymbolizer(symbolizer.Symbolizer): 26 """A symbolizer that wraps llvm-symbolizer.""" 27 def __init__(self, binary: Optional[Path] = None, force_legacy=False): 28 # Lets destructor return cleanly if the binary is not found. 29 self._symbolizer = None 30 if shutil.which('llvm-symbolizer') is None: 31 raise FileNotFoundError( 32 'llvm-symbolizer not installed. Run bootstrap, or download ' 33 'LLVM (https://github.com/llvm/llvm-project/releases/) and add ' 34 'the tools to your system PATH') 35 36 # Prefer JSON output as it's easier to decode. 37 if force_legacy: 38 self._json_mode = False 39 else: 40 self._json_mode = LlvmSymbolizer._is_json_compatibile() 41 42 if binary is not None: 43 if not binary.exists(): 44 raise FileNotFoundError(binary) 45 46 output_style = 'JSON' if self._json_mode else 'LLVM' 47 cmd = [ 48 'llvm-symbolizer', 49 '--no-inlines', 50 '--demangle', 51 '--functions', 52 f'--output-style={output_style}', 53 '--exe', 54 str(binary), 55 ] 56 self._symbolizer = subprocess.Popen(cmd, 57 stdout=subprocess.PIPE, 58 stdin=subprocess.PIPE) 59 60 self._lock: threading.Lock = threading.Lock() 61 62 def __del__(self): 63 if self._symbolizer: 64 self._symbolizer.terminate() 65 66 @staticmethod 67 def _is_json_compatibile() -> bool: 68 """Checks llvm-symbolizer to ensure compatibility""" 69 result = subprocess.run(('llvm-symbolizer', '--help'), 70 stdout=subprocess.PIPE, 71 stdin=subprocess.PIPE) 72 for line in result.stdout.decode().splitlines(): 73 if '--output-style' in line and 'JSON' in line: 74 return True 75 76 return False 77 78 @staticmethod 79 def _read_json_symbol(address, stdout) -> symbolizer.Symbol: 80 """Reads a single symbol from llvm-symbolizer's JSON output mode.""" 81 results = json.loads(stdout.readline().decode()) 82 # The symbol resolution should give us at least one symbol, even 83 # if it's largely empty. 84 assert len(results["Symbol"]) > 0 85 86 # Get the first symbol. 87 symbol = results["Symbol"][0] 88 89 return symbolizer.Symbol(address=address, 90 name=symbol['FunctionName'], 91 file=symbol['FileName'], 92 line=symbol['Line']) 93 94 @staticmethod 95 def _llvm_output_line_splitter(file_and_line: str) -> Tuple[str, int]: 96 split = file_and_line.split(':') 97 # LLVM file name output is as follows: 98 # path/to/src.c:123:1 99 # Where the last number is the discriminator, the second to last the 100 # line number, and all leading characters the file name. For now, 101 # this class ignores discriminators. 102 line_number_str = split[-2] 103 file = ':'.join(split[:-2]) 104 105 if not line_number_str: 106 raise ValueError(f'Bad symbol format: {file_and_line}') 107 108 # For unknown file names, mark as blank. 109 if file.startswith('?'): 110 return ('', 0) 111 112 return (file, int(line_number_str)) 113 114 @staticmethod 115 def _read_llvm_symbol(address, stdout) -> symbolizer.Symbol: 116 """Reads a single symbol from llvm-symbolizer's LLVM output mode.""" 117 symbol = stdout.readline().decode().strip() 118 file_and_line = stdout.readline().decode().strip() 119 120 # Might have gotten multiple symbol matches, drop all of the other ones. 121 # The results of a symbol are denoted by an empty newline. 122 while stdout.readline().decode() != '\n': 123 pass 124 125 if symbol.startswith('?'): 126 return symbolizer.Symbol(address) 127 128 file, line_number = LlvmSymbolizer._llvm_output_line_splitter( 129 file_and_line) 130 131 return symbolizer.Symbol(address, symbol, file, line_number) 132 133 def symbolize(self, address: int) -> symbolizer.Symbol: 134 """Symbolizes an address using the loaded ELF file.""" 135 if not self._symbolizer: 136 return symbolizer.Symbol(address=address, name='', file='', line=0) 137 138 with self._lock: 139 if self._symbolizer.returncode is not None: 140 raise ValueError('llvm-symbolizer closed unexpectedly') 141 142 stdin = self._symbolizer.stdin 143 stdout = self._symbolizer.stdout 144 145 assert stdin is not None 146 assert stdout is not None 147 148 stdin.write(f'0x{address:08X}\n'.encode()) 149 stdin.flush() 150 151 if self._json_mode: 152 return LlvmSymbolizer._read_json_symbol(address, stdout) 153 154 return LlvmSymbolizer._read_llvm_symbol(address, stdout) 155