• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2021 The Pigweed Authors
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may not
4# use this file except in compliance with the License. You may obtain a copy of
5# the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations under
13# the License.
14"""A symbolizer based on llvm-symbolizer."""
15
16import shutil
17import subprocess
18import threading
19import json
20from pathlib import Path
21from pw_symbolizer import symbolizer
22
23
24class LlvmSymbolizer(symbolizer.Symbolizer):
25    """A symbolizer that wraps llvm-symbolizer."""
26
27    def __init__(
28        self,
29        binary: Path | None = None,
30        force_legacy=False,
31        llvm_symbolizer_binary: Path | None = None,
32    ):
33        # Lets destructor return cleanly if the binary is not found.
34        self._symbolizer = None
35        if llvm_symbolizer_binary:
36            self._symbolizer_binary = str(llvm_symbolizer_binary)
37        else:
38            self._symbolizer_binary = 'llvm-symbolizer'
39            if shutil.which(self._symbolizer_binary) is None:
40                raise FileNotFoundError(
41                    'llvm-symbolizer not installed. Run bootstrap, or download '
42                    'LLVM (https://github.com/llvm/llvm-project/releases/) and '
43                    'add the tools to your system PATH'
44                )
45
46        # Prefer JSON output as it's easier to decode.
47        if force_legacy:
48            self._json_mode = False
49        else:
50            self._json_mode = LlvmSymbolizer._is_json_compatibile(
51                self._symbolizer_binary
52            )
53
54        if binary is not None:
55            if not binary.exists():
56                raise FileNotFoundError(binary)
57
58            output_style = 'JSON' if self._json_mode else 'LLVM'
59            cmd = [
60                self._symbolizer_binary,
61                '--no-inlines',
62                '--demangle',
63                '--functions',
64                f'--output-style={output_style}',
65                '--exe',
66                str(binary),
67            ]
68            self._symbolizer = subprocess.Popen(
69                cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE
70            )
71
72            self._lock: threading.Lock = threading.Lock()
73
74    def __del__(self):
75        self.close()
76
77    def close(self):
78        """Closes the active llvm-symbolizer process."""
79        if self._symbolizer is not None:
80            self._symbolizer.terminate()
81            self._symbolizer.wait()
82            self._symbolizer.stdin.close()
83            self._symbolizer.stdout.close()
84            self._symbolizer = None
85
86    @staticmethod
87    def _is_json_compatibile(symbolizer_binary: str) -> bool:
88        """Checks llvm-symbolizer to ensure compatibility"""
89        result = subprocess.run(
90            (symbolizer_binary, '--help'),
91            stdout=subprocess.PIPE,
92            stdin=subprocess.PIPE,
93        )
94        for line in result.stdout.decode().splitlines():
95            if '--output-style' in line and 'JSON' in line:
96                return True
97
98        return False
99
100    @staticmethod
101    def _read_json_symbol(address, stdout) -> symbolizer.Symbol:
102        """Reads a single symbol from llvm-symbolizer's JSON output mode."""
103        results = json.loads(stdout.readline().decode())
104        # The symbol resolution should give us at least one symbol, even
105        # if it's largely empty.
106        assert len(results["Symbol"]) > 0
107
108        # Get the first symbol.
109        symbol = results["Symbol"][0]
110
111        return symbolizer.Symbol(
112            address=address,
113            name=symbol['FunctionName'],
114            file=symbol['FileName'],
115            line=symbol['Line'],
116        )
117
118    @staticmethod
119    def _llvm_output_line_splitter(file_and_line: str) -> tuple[str, int]:
120        split = file_and_line.split(':')
121        # LLVM file name output is as follows:
122        #   path/to/src.c:123:1
123        # Where the last number is the discriminator, the second to last the
124        # line number, and all leading characters the file name. For now,
125        # this class ignores discriminators.
126        line_number_str = split[-2]
127        file = ':'.join(split[:-2])
128
129        if not line_number_str:
130            raise ValueError(f'Bad symbol format: {file_and_line}')
131
132        # For unknown file names, mark as blank.
133        if file.startswith('?'):
134            return ('', 0)
135
136        return (file, int(line_number_str))
137
138    @staticmethod
139    def _read_llvm_symbol(address, stdout) -> symbolizer.Symbol:
140        """Reads a single symbol from llvm-symbolizer's LLVM output mode."""
141        symbol = stdout.readline().decode().strip()
142        file_and_line = stdout.readline().decode().strip()
143
144        # Might have gotten multiple symbol matches, drop all of the other ones.
145        # The results of a symbol are denoted by an empty newline.
146        while stdout.readline().decode() != '\n':
147            pass
148
149        if symbol.startswith('?'):
150            return symbolizer.Symbol(address)
151
152        file, line_number = LlvmSymbolizer._llvm_output_line_splitter(
153            file_and_line
154        )
155
156        return symbolizer.Symbol(address, symbol, file, line_number)
157
158    def symbolize(self, address: int) -> symbolizer.Symbol:
159        """Symbolizes an address using the loaded ELF file."""
160        if not self._symbolizer:
161            return symbolizer.Symbol(address=address, name='', file='', line=0)
162
163        with self._lock:
164            if self._symbolizer.returncode is not None:
165                raise ValueError('llvm-symbolizer closed unexpectedly')
166
167            stdin = self._symbolizer.stdin
168            stdout = self._symbolizer.stdout
169
170            assert stdin is not None
171            assert stdout is not None
172
173            stdin.write(f'0x{address:08X}\n'.encode())
174            stdin.flush()
175
176            if self._json_mode:
177                return LlvmSymbolizer._read_json_symbol(address, stdout)
178
179            return LlvmSymbolizer._read_llvm_symbol(address, stdout)
180