1# Copyright 2021 The Pigweed Authors 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); you may not 4# use this file except in compliance with the License. You may obtain a copy of 5# the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12# License for the specific language governing permissions and limitations under 13# the License. 14"""A symbolizer based on llvm-symbolizer.""" 15 16import shutil 17import subprocess 18import threading 19import json 20from typing import Optional, Tuple 21from pathlib import Path 22from pw_symbolizer import symbolizer 23 24 25class LlvmSymbolizer(symbolizer.Symbolizer): 26 """A symbolizer that wraps llvm-symbolizer.""" 27 28 def __init__(self, binary: Optional[Path] = None, force_legacy=False): 29 # Lets destructor return cleanly if the binary is not found. 30 self._symbolizer = None 31 if shutil.which('llvm-symbolizer') is None: 32 raise FileNotFoundError( 33 'llvm-symbolizer not installed. Run bootstrap, or download ' 34 'LLVM (https://github.com/llvm/llvm-project/releases/) and add ' 35 'the tools to your system PATH' 36 ) 37 38 # Prefer JSON output as it's easier to decode. 39 if force_legacy: 40 self._json_mode = False 41 else: 42 self._json_mode = LlvmSymbolizer._is_json_compatibile() 43 44 if binary is not None: 45 if not binary.exists(): 46 raise FileNotFoundError(binary) 47 48 output_style = 'JSON' if self._json_mode else 'LLVM' 49 cmd = [ 50 'llvm-symbolizer', 51 '--no-inlines', 52 '--demangle', 53 '--functions', 54 f'--output-style={output_style}', 55 '--exe', 56 str(binary), 57 ] 58 self._symbolizer = subprocess.Popen( 59 cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE 60 ) 61 62 self._lock: threading.Lock = threading.Lock() 63 64 def __del__(self): 65 if self._symbolizer: 66 self._symbolizer.terminate() 67 self._symbolizer.wait() 68 69 @staticmethod 70 def _is_json_compatibile() -> bool: 71 """Checks llvm-symbolizer to ensure compatibility""" 72 result = subprocess.run( 73 ('llvm-symbolizer', '--help'), 74 stdout=subprocess.PIPE, 75 stdin=subprocess.PIPE, 76 ) 77 for line in result.stdout.decode().splitlines(): 78 if '--output-style' in line and 'JSON' in line: 79 return True 80 81 return False 82 83 @staticmethod 84 def _read_json_symbol(address, stdout) -> symbolizer.Symbol: 85 """Reads a single symbol from llvm-symbolizer's JSON output mode.""" 86 results = json.loads(stdout.readline().decode()) 87 # The symbol resolution should give us at least one symbol, even 88 # if it's largely empty. 89 assert len(results["Symbol"]) > 0 90 91 # Get the first symbol. 92 symbol = results["Symbol"][0] 93 94 return symbolizer.Symbol( 95 address=address, 96 name=symbol['FunctionName'], 97 file=symbol['FileName'], 98 line=symbol['Line'], 99 ) 100 101 @staticmethod 102 def _llvm_output_line_splitter(file_and_line: str) -> Tuple[str, int]: 103 split = file_and_line.split(':') 104 # LLVM file name output is as follows: 105 # path/to/src.c:123:1 106 # Where the last number is the discriminator, the second to last the 107 # line number, and all leading characters the file name. For now, 108 # this class ignores discriminators. 109 line_number_str = split[-2] 110 file = ':'.join(split[:-2]) 111 112 if not line_number_str: 113 raise ValueError(f'Bad symbol format: {file_and_line}') 114 115 # For unknown file names, mark as blank. 116 if file.startswith('?'): 117 return ('', 0) 118 119 return (file, int(line_number_str)) 120 121 @staticmethod 122 def _read_llvm_symbol(address, stdout) -> symbolizer.Symbol: 123 """Reads a single symbol from llvm-symbolizer's LLVM output mode.""" 124 symbol = stdout.readline().decode().strip() 125 file_and_line = stdout.readline().decode().strip() 126 127 # Might have gotten multiple symbol matches, drop all of the other ones. 128 # The results of a symbol are denoted by an empty newline. 129 while stdout.readline().decode() != '\n': 130 pass 131 132 if symbol.startswith('?'): 133 return symbolizer.Symbol(address) 134 135 file, line_number = LlvmSymbolizer._llvm_output_line_splitter( 136 file_and_line 137 ) 138 139 return symbolizer.Symbol(address, symbol, file, line_number) 140 141 def symbolize(self, address: int) -> symbolizer.Symbol: 142 """Symbolizes an address using the loaded ELF file.""" 143 if not self._symbolizer: 144 return symbolizer.Symbol(address=address, name='', file='', line=0) 145 146 with self._lock: 147 if self._symbolizer.returncode is not None: 148 raise ValueError('llvm-symbolizer closed unexpectedly') 149 150 stdin = self._symbolizer.stdin 151 stdout = self._symbolizer.stdout 152 153 assert stdin is not None 154 assert stdout is not None 155 156 stdin.write(f'0x{address:08X}\n'.encode()) 157 stdin.flush() 158 159 if self._json_mode: 160 return LlvmSymbolizer._read_json_symbol(address, stdout) 161 162 return LlvmSymbolizer._read_llvm_symbol(address, stdout) 163