• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2021 The Pigweed Authors
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may not
4# use this file except in compliance with the License. You may obtain a copy of
5# the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations under
13# the License.
14"""A symbolizer based on llvm-symbolizer."""
15
16import shutil
17import subprocess
18import threading
19import json
20from typing import Optional, Tuple
21from pathlib import Path
22from pw_symbolizer import symbolizer
23
24
25class LlvmSymbolizer(symbolizer.Symbolizer):
26    """A symbolizer that wraps llvm-symbolizer."""
27
28    def __init__(self, binary: Optional[Path] = None, force_legacy=False):
29        # Lets destructor return cleanly if the binary is not found.
30        self._symbolizer = None
31        if shutil.which('llvm-symbolizer') is None:
32            raise FileNotFoundError(
33                'llvm-symbolizer not installed. Run bootstrap, or download '
34                'LLVM (https://github.com/llvm/llvm-project/releases/) and add '
35                'the tools to your system PATH'
36            )
37
38        # Prefer JSON output as it's easier to decode.
39        if force_legacy:
40            self._json_mode = False
41        else:
42            self._json_mode = LlvmSymbolizer._is_json_compatibile()
43
44        if binary is not None:
45            if not binary.exists():
46                raise FileNotFoundError(binary)
47
48            output_style = 'JSON' if self._json_mode else 'LLVM'
49            cmd = [
50                'llvm-symbolizer',
51                '--no-inlines',
52                '--demangle',
53                '--functions',
54                f'--output-style={output_style}',
55                '--exe',
56                str(binary),
57            ]
58            self._symbolizer = subprocess.Popen(
59                cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE
60            )
61
62            self._lock: threading.Lock = threading.Lock()
63
64    def __del__(self):
65        if self._symbolizer:
66            self._symbolizer.terminate()
67            self._symbolizer.wait()
68
69    @staticmethod
70    def _is_json_compatibile() -> bool:
71        """Checks llvm-symbolizer to ensure compatibility"""
72        result = subprocess.run(
73            ('llvm-symbolizer', '--help'),
74            stdout=subprocess.PIPE,
75            stdin=subprocess.PIPE,
76        )
77        for line in result.stdout.decode().splitlines():
78            if '--output-style' in line and 'JSON' in line:
79                return True
80
81        return False
82
83    @staticmethod
84    def _read_json_symbol(address, stdout) -> symbolizer.Symbol:
85        """Reads a single symbol from llvm-symbolizer's JSON output mode."""
86        results = json.loads(stdout.readline().decode())
87        # The symbol resolution should give us at least one symbol, even
88        # if it's largely empty.
89        assert len(results["Symbol"]) > 0
90
91        # Get the first symbol.
92        symbol = results["Symbol"][0]
93
94        return symbolizer.Symbol(
95            address=address,
96            name=symbol['FunctionName'],
97            file=symbol['FileName'],
98            line=symbol['Line'],
99        )
100
101    @staticmethod
102    def _llvm_output_line_splitter(file_and_line: str) -> Tuple[str, int]:
103        split = file_and_line.split(':')
104        # LLVM file name output is as follows:
105        #   path/to/src.c:123:1
106        # Where the last number is the discriminator, the second to last the
107        # line number, and all leading characters the file name. For now,
108        # this class ignores discriminators.
109        line_number_str = split[-2]
110        file = ':'.join(split[:-2])
111
112        if not line_number_str:
113            raise ValueError(f'Bad symbol format: {file_and_line}')
114
115        # For unknown file names, mark as blank.
116        if file.startswith('?'):
117            return ('', 0)
118
119        return (file, int(line_number_str))
120
121    @staticmethod
122    def _read_llvm_symbol(address, stdout) -> symbolizer.Symbol:
123        """Reads a single symbol from llvm-symbolizer's LLVM output mode."""
124        symbol = stdout.readline().decode().strip()
125        file_and_line = stdout.readline().decode().strip()
126
127        # Might have gotten multiple symbol matches, drop all of the other ones.
128        # The results of a symbol are denoted by an empty newline.
129        while stdout.readline().decode() != '\n':
130            pass
131
132        if symbol.startswith('?'):
133            return symbolizer.Symbol(address)
134
135        file, line_number = LlvmSymbolizer._llvm_output_line_splitter(
136            file_and_line
137        )
138
139        return symbolizer.Symbol(address, symbol, file, line_number)
140
141    def symbolize(self, address: int) -> symbolizer.Symbol:
142        """Symbolizes an address using the loaded ELF file."""
143        if not self._symbolizer:
144            return symbolizer.Symbol(address=address, name='', file='', line=0)
145
146        with self._lock:
147            if self._symbolizer.returncode is not None:
148                raise ValueError('llvm-symbolizer closed unexpectedly')
149
150            stdin = self._symbolizer.stdin
151            stdout = self._symbolizer.stdout
152
153            assert stdin is not None
154            assert stdout is not None
155
156            stdin.write(f'0x{address:08X}\n'.encode())
157            stdin.flush()
158
159            if self._json_mode:
160                return LlvmSymbolizer._read_json_symbol(address, stdout)
161
162            return LlvmSymbolizer._read_llvm_symbol(address, stdout)
163