1# Copyright 2021 The Pigweed Authors 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); you may not 4# use this file except in compliance with the License. You may obtain a copy of 5# the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12# License for the specific language governing permissions and limitations under 13# the License. 14"""A symbolizer based on llvm-symbolizer.""" 15 16import shutil 17import subprocess 18import threading 19import json 20from pathlib import Path 21from pw_symbolizer import symbolizer 22 23 24class LlvmSymbolizer(symbolizer.Symbolizer): 25 """A symbolizer that wraps llvm-symbolizer.""" 26 27 def __init__( 28 self, 29 binary: Path | None = None, 30 force_legacy=False, 31 llvm_symbolizer_binary: Path | None = None, 32 ): 33 # Lets destructor return cleanly if the binary is not found. 34 self._symbolizer = None 35 if llvm_symbolizer_binary: 36 self._symbolizer_binary = str(llvm_symbolizer_binary) 37 else: 38 self._symbolizer_binary = 'llvm-symbolizer' 39 if shutil.which(self._symbolizer_binary) is None: 40 raise FileNotFoundError( 41 'llvm-symbolizer not installed. Run bootstrap, or download ' 42 'LLVM (https://github.com/llvm/llvm-project/releases/) and ' 43 'add the tools to your system PATH' 44 ) 45 46 # Prefer JSON output as it's easier to decode. 47 if force_legacy: 48 self._json_mode = False 49 else: 50 self._json_mode = LlvmSymbolizer._is_json_compatibile( 51 self._symbolizer_binary 52 ) 53 54 if binary is not None: 55 if not binary.exists(): 56 raise FileNotFoundError(binary) 57 58 output_style = 'JSON' if self._json_mode else 'LLVM' 59 cmd = [ 60 self._symbolizer_binary, 61 '--no-inlines', 62 '--demangle', 63 '--functions', 64 f'--output-style={output_style}', 65 '--exe', 66 str(binary), 67 ] 68 self._symbolizer = subprocess.Popen( 69 cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE 70 ) 71 72 self._lock: threading.Lock = threading.Lock() 73 74 def __del__(self): 75 self.close() 76 77 def close(self): 78 """Closes the active llvm-symbolizer process.""" 79 if self._symbolizer is not None: 80 self._symbolizer.terminate() 81 self._symbolizer.wait() 82 self._symbolizer.stdin.close() 83 self._symbolizer.stdout.close() 84 self._symbolizer = None 85 86 @staticmethod 87 def _is_json_compatibile(symbolizer_binary: str) -> bool: 88 """Checks llvm-symbolizer to ensure compatibility""" 89 result = subprocess.run( 90 (symbolizer_binary, '--help'), 91 stdout=subprocess.PIPE, 92 stdin=subprocess.PIPE, 93 ) 94 for line in result.stdout.decode().splitlines(): 95 if '--output-style' in line and 'JSON' in line: 96 return True 97 98 return False 99 100 @staticmethod 101 def _read_json_symbol(address, stdout) -> symbolizer.Symbol: 102 """Reads a single symbol from llvm-symbolizer's JSON output mode.""" 103 results = json.loads(stdout.readline().decode()) 104 # The symbol resolution should give us at least one symbol, even 105 # if it's largely empty. 106 assert len(results["Symbol"]) > 0 107 108 # Get the first symbol. 109 symbol = results["Symbol"][0] 110 111 return symbolizer.Symbol( 112 address=address, 113 name=symbol['FunctionName'], 114 file=symbol['FileName'], 115 line=symbol['Line'], 116 ) 117 118 @staticmethod 119 def _llvm_output_line_splitter(file_and_line: str) -> tuple[str, int]: 120 split = file_and_line.split(':') 121 # LLVM file name output is as follows: 122 # path/to/src.c:123:1 123 # Where the last number is the discriminator, the second to last the 124 # line number, and all leading characters the file name. For now, 125 # this class ignores discriminators. 126 line_number_str = split[-2] 127 file = ':'.join(split[:-2]) 128 129 if not line_number_str: 130 raise ValueError(f'Bad symbol format: {file_and_line}') 131 132 # For unknown file names, mark as blank. 133 if file.startswith('?'): 134 return ('', 0) 135 136 return (file, int(line_number_str)) 137 138 @staticmethod 139 def _read_llvm_symbol(address, stdout) -> symbolizer.Symbol: 140 """Reads a single symbol from llvm-symbolizer's LLVM output mode.""" 141 symbol = stdout.readline().decode().strip() 142 file_and_line = stdout.readline().decode().strip() 143 144 # Might have gotten multiple symbol matches, drop all of the other ones. 145 # The results of a symbol are denoted by an empty newline. 146 while stdout.readline().decode() != '\n': 147 pass 148 149 if symbol.startswith('?'): 150 return symbolizer.Symbol(address) 151 152 file, line_number = LlvmSymbolizer._llvm_output_line_splitter( 153 file_and_line 154 ) 155 156 return symbolizer.Symbol(address, symbol, file, line_number) 157 158 def symbolize(self, address: int) -> symbolizer.Symbol: 159 """Symbolizes an address using the loaded ELF file.""" 160 if not self._symbolizer: 161 return symbolizer.Symbol(address=address, name='', file='', line=0) 162 163 with self._lock: 164 if self._symbolizer.returncode is not None: 165 raise ValueError('llvm-symbolizer closed unexpectedly') 166 167 stdin = self._symbolizer.stdin 168 stdout = self._symbolizer.stdout 169 170 assert stdin is not None 171 assert stdout is not None 172 173 stdin.write(f'0x{address:08X}\n'.encode()) 174 stdin.flush() 175 176 if self._json_mode: 177 return LlvmSymbolizer._read_json_symbol(address, stdout) 178 179 return LlvmSymbolizer._read_llvm_symbol(address, stdout) 180