• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2021 The Pigweed Authors
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may not
4# use this file except in compliance with the License. You may obtain a copy of
5# the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations under
13# the License.
14"""A symbolizer based on llvm-symbolizer."""
15
16import shutil
17import subprocess
18import threading
19import json
20from typing import Optional, Tuple
21from pathlib import Path
22from pw_symbolizer import symbolizer
23
24
25class LlvmSymbolizer(symbolizer.Symbolizer):
26    """A symbolizer that wraps llvm-symbolizer."""
27    def __init__(self, binary: Optional[Path] = None, force_legacy=False):
28        # Lets destructor return cleanly if the binary is not found.
29        self._symbolizer = None
30        if shutil.which('llvm-symbolizer') is None:
31            raise FileNotFoundError(
32                'llvm-symbolizer not installed. Run bootstrap, or download '
33                'LLVM (https://github.com/llvm/llvm-project/releases/) and add '
34                'the tools to your system PATH')
35
36        # Prefer JSON output as it's easier to decode.
37        if force_legacy:
38            self._json_mode = False
39        else:
40            self._json_mode = LlvmSymbolizer._is_json_compatibile()
41
42        if binary is not None:
43            if not binary.exists():
44                raise FileNotFoundError(binary)
45
46            output_style = 'JSON' if self._json_mode else 'LLVM'
47            cmd = [
48                'llvm-symbolizer',
49                '--no-inlines',
50                '--demangle',
51                '--functions',
52                f'--output-style={output_style}',
53                '--exe',
54                str(binary),
55            ]
56            self._symbolizer = subprocess.Popen(cmd,
57                                                stdout=subprocess.PIPE,
58                                                stdin=subprocess.PIPE)
59
60            self._lock: threading.Lock = threading.Lock()
61
62    def __del__(self):
63        if self._symbolizer:
64            self._symbolizer.terminate()
65
66    @staticmethod
67    def _is_json_compatibile() -> bool:
68        """Checks llvm-symbolizer to ensure compatibility"""
69        result = subprocess.run(('llvm-symbolizer', '--help'),
70                                stdout=subprocess.PIPE,
71                                stdin=subprocess.PIPE)
72        for line in result.stdout.decode().splitlines():
73            if '--output-style' in line and 'JSON' in line:
74                return True
75
76        return False
77
78    @staticmethod
79    def _read_json_symbol(address, stdout) -> symbolizer.Symbol:
80        """Reads a single symbol from llvm-symbolizer's JSON output mode."""
81        results = json.loads(stdout.readline().decode())
82        # The symbol resolution should give us at least one symbol, even
83        # if it's largely empty.
84        assert len(results["Symbol"]) > 0
85
86        # Get the first symbol.
87        symbol = results["Symbol"][0]
88
89        return symbolizer.Symbol(address=address,
90                                 name=symbol['FunctionName'],
91                                 file=symbol['FileName'],
92                                 line=symbol['Line'])
93
94    @staticmethod
95    def _llvm_output_line_splitter(file_and_line: str) -> Tuple[str, int]:
96        split = file_and_line.split(':')
97        # LLVM file name output is as follows:
98        #   path/to/src.c:123:1
99        # Where the last number is the discriminator, the second to last the
100        # line number, and all leading characters the file name. For now,
101        # this class ignores discriminators.
102        line_number_str = split[-2]
103        file = ':'.join(split[:-2])
104
105        if not line_number_str:
106            raise ValueError(f'Bad symbol format: {file_and_line}')
107
108        # For unknown file names, mark as blank.
109        if file.startswith('?'):
110            return ('', 0)
111
112        return (file, int(line_number_str))
113
114    @staticmethod
115    def _read_llvm_symbol(address, stdout) -> symbolizer.Symbol:
116        """Reads a single symbol from llvm-symbolizer's LLVM output mode."""
117        symbol = stdout.readline().decode().strip()
118        file_and_line = stdout.readline().decode().strip()
119
120        # Might have gotten multiple symbol matches, drop all of the other ones.
121        # The results of a symbol are denoted by an empty newline.
122        while stdout.readline().decode() != '\n':
123            pass
124
125        if symbol.startswith('?'):
126            return symbolizer.Symbol(address)
127
128        file, line_number = LlvmSymbolizer._llvm_output_line_splitter(
129            file_and_line)
130
131        return symbolizer.Symbol(address, symbol, file, line_number)
132
133    def symbolize(self, address: int) -> symbolizer.Symbol:
134        """Symbolizes an address using the loaded ELF file."""
135        if not self._symbolizer:
136            return symbolizer.Symbol(address=address, name='', file='', line=0)
137
138        with self._lock:
139            if self._symbolizer.returncode is not None:
140                raise ValueError('llvm-symbolizer closed unexpectedly')
141
142            stdin = self._symbolizer.stdin
143            stdout = self._symbolizer.stdout
144
145            assert stdin is not None
146            assert stdout is not None
147
148            stdin.write(f'0x{address:08X}\n'.encode())
149            stdin.flush()
150
151            if self._json_mode:
152                return LlvmSymbolizer._read_json_symbol(address, stdout)
153
154            return LlvmSymbolizer._read_llvm_symbol(address, stdout)
155