• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#
3# Copyright (C) 2022 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""
18Checks dwarf CFI (unwinding) information by comparing it to disassembly.
19It is only a simple heuristic check of stack pointer adjustments.
20Fully inferring CFI from disassembly is not possible in general.
21"""
22
23import os, re, subprocess, collections, pathlib, bisect, collections
24from typing import List, Optional, Set, Tuple
25
26Source = collections.namedtuple("Source", ["addr", "file", "line", "flag"])
27
28def get_source(lib: pathlib.Path) -> List[Source]:
29  """ Get source-file and line-number for all hand-written assembly code. """
30
31  proc = subprocess.run(["llvm-dwarfdump", "--debug-line", lib],
32                        encoding='utf-8',
33                        capture_output=True,
34                        check=True)
35
36  section_re = re.compile("^debug_line\[0x[0-9a-f]+\]$", re.MULTILINE)
37  filename_re = re.compile('file_names\[ *(\d)+\]:\n\s*name: "(.*)"', re.MULTILINE)
38  line_re = re.compile('0x([0-9a-f]{16}) +(\d+) +\d+ +(\d+)'  # addr, line, column, file
39                       ' +\d+ +\d +(.*)')                     # isa, discriminator, flag
40
41  results = []
42  for section in section_re.split(proc.stdout):
43    files = {m[1]: m[2] for m in filename_re.finditer(section)}
44    if not any(f.endswith(".S") for f in files.values()):
45      continue
46    lines = line_re.findall(section)
47    results.extend([Source(int(a, 16), files[fn], l, fg) for a, l, fn, fg in lines])
48  return sorted(filter(lambda line: "end_sequence" not in line.flag, results))
49
50Fde = collections.namedtuple("Fde", ["addr", "end", "data"])
51
52def get_fde(lib: pathlib.Path) -> List[Fde]:
53  """ Get all unwinding FDE blocks (in dumped text-based format) """
54
55  proc = subprocess.run(["llvm-dwarfdump", "--debug-frame", lib],
56                        encoding='utf-8',
57                        capture_output=True,
58                        check=True)
59
60  section_re = re.compile("\n(?! |\n)", re.MULTILINE)  # New-line not followed by indent.
61  fda_re = re.compile(".* FDE .* pc=([0-9a-f]+)...([0-9a-f]+)")
62
63  results = []
64  for section in section_re.split(proc.stdout):
65    m = fda_re.match(section)
66    if m:
67      fde = Fde(int(m[1], 16), int(m[2], 16), section)
68      if fde.addr != 0:
69        results.append(fde)
70  return sorted(results)
71
72Asm = collections.namedtuple("Asm", ["addr", "name", "data"])
73
74def get_asm(lib: pathlib.Path) -> List[Asm]:
75  """ Get disassembly for all methods (in dumped text-based format) """
76
77  proc = subprocess.run(["llvm-objdump", "--disassemble", lib],
78                        encoding='utf-8',
79                        capture_output=True,
80                        check=True)
81
82  section_re = re.compile("\n(?! |\n)", re.MULTILINE)  # New-line not followed by indent.
83  sym_re = re.compile("([0-9a-f]+) <(.+)>:")
84
85  results = []
86  for section in section_re.split(proc.stdout):
87    sym = sym_re.match(section)
88    if sym:
89      results.append(Asm(int(sym[1], 16), sym[2], section))
90  return sorted(results)
91
92Cfa = collections.namedtuple("Cfa", ["addr", "cfa"])
93
94def get_cfa(fde: Fde) -> List[Cfa]:
95  """ Extract individual CFA (SP+offset) entries from the FDE block """
96
97  cfa_re = re.compile("0x([0-9a-f]+): CFA=([^\s:]+)")
98  return [Cfa(int(addr, 16), cfa) for addr, cfa in cfa_re.findall(fde.data)]
99
100Inst = collections.namedtuple("Inst", ["addr", "inst", "symbol"])
101
102def get_instructions(asm: Asm) -> List[Inst]:
103  """ Extract individual instructions from disassembled code block """
104
105  data = re.sub(r"[ \t]+", " ", asm.data)
106  inst_re = re.compile(r"([0-9a-f]+): +(?:[0-9a-f]{2} +)*(.*)")
107  return [Inst(int(addr, 16), inst, asm.name) for addr, inst in inst_re.findall(data)]
108
109CfaOffset = collections.namedtuple("CfaOffset", ["addr", "offset"])
110
111def get_dwarf_cfa_offsets(cfas: List[Cfa]) -> List[CfaOffset]:
112  """ Parse textual CFA entries into integer stack offsets """
113
114  result = []
115  for addr, cfa in cfas:
116    if cfa == "WSP" or cfa == "SP":
117      result.append(CfaOffset(addr, 0))
118    elif cfa.startswith("WSP+") or cfa.startswith("SP+"):
119      result.append(CfaOffset(addr, int(cfa.split("+")[1])))
120    else:
121      result.append(CfaOffset(addr, None))
122  return result
123
124def get_infered_cfa_offsets(insts: List[Inst]) -> List[CfaOffset]:
125  """ Heuristic to convert disassembly into stack offsets """
126
127  # Regular expressions which find instructions that adjust stack pointer.
128  rexprs = []
129  def add(rexpr, adjust_offset):
130    rexprs.append((re.compile(rexpr), adjust_offset))
131  add(r"sub sp,(?: sp,)? #(\d+)", lambda m: int(m[1]))
132  add(r"add sp,(?: sp,)? #(\d+)", lambda m: -int(m[1]))
133  add(r"str \w+, \[sp, #-(\d+)\]!", lambda m: int(m[1]))
134  add(r"ldr \w+, \[sp\], #(\d+)", lambda m: -int(m[1]))
135  add(r"stp \w+, \w+, \[sp, #-(\d+)\]!", lambda m: int(m[1]))
136  add(r"ldp \w+, \w+, \[sp\], #(\d+)", lambda m: -int(m[1]))
137  add(r"vpush \{([d0-9, ]*)\}", lambda m: 8 * len(m[1].split(",")))
138  add(r"vpop \{([d0-9, ]*)\}", lambda m: -8 * len(m[1].split(",")))
139  add(r"v?push(?:\.w)? \{([\w+, ]*)\}", lambda m: 4 * len(m[1].split(",")))
140  add(r"v?pop(?:\.w)? \{([\w+, ]*)\}", lambda m: -4 * len(m[1].split(",")))
141
142  # Regular expression which identifies branches.
143  jmp_re = re.compile(r"cb\w* \w+, 0x(\w+)|(?:b|bl|b\w\w) 0x(\w+)")
144
145  offset, future_offset = 0, {}
146  result = [CfaOffset(insts[0].addr, offset)]
147  for addr, inst, symbol in insts:
148    # Previous code branched here, so us that offset instead.
149    # This likely identifies slow-path which is after return.
150    if addr in future_offset:
151      offset = future_offset[addr]
152
153    # Add entry to output (only if the offset changed).
154    if result[-1].offset != offset:
155      result.append(CfaOffset(addr, offset))
156
157    # Adjust offset if the instruction modifies stack pointer.
158    for rexpr, adjust_offset in rexprs:
159      m = rexpr.match(inst)
160      if m:
161        offset += adjust_offset(m)
162        break  # First matched pattern wins.
163
164    # Record branches.  We only support forward edges for now.
165    m = jmp_re.match(inst)
166    if m:
167      future_offset[int(m[m.lastindex], 16)] = offset
168  return result
169
170def check_fde(fde: Fde, insts: List[Inst], srcs, verbose: bool = False) -> Tuple[str, Set[int]]:
171  """ Compare DWARF offsets to assembly-inferred offsets. Report differences. """
172
173  error, seen_addrs = None, set()
174  cfas = get_cfa(fde)
175  i, dwarf_cfa = 0, get_dwarf_cfa_offsets(cfas)
176  j, infered_cfa = 0, get_infered_cfa_offsets(insts)
177  for inst in insts:
178    seen_addrs.add(inst.addr)
179    while i+1 < len(dwarf_cfa) and dwarf_cfa[i+1].addr <= inst.addr:
180      i += 1
181    while j+1 < len(infered_cfa) and infered_cfa[j+1].addr <= inst.addr:
182      j += 1
183    if verbose:
184      print("{:08x}: dwarf={:4} infered={:4} {:40} // {}".format(
185                inst.addr, str(dwarf_cfa[i].offset), str(infered_cfa[j].offset),
186                inst.inst.strip(), srcs.get(inst.addr, "")))
187    if dwarf_cfa[i].offset is not None and dwarf_cfa[i].offset != infered_cfa[j].offset:
188      if inst.addr in srcs:  # Only report if it maps to source code (not padding or literals).
189        error = error or "{:08x} {}".format(inst.addr, srcs.get(inst.addr, ""))
190  return error, seen_addrs
191
192def check_lib(lib: pathlib.Path):
193  assert lib.exists()
194  IGNORE = [
195      "art_quick_throw_null_pointer_exception_from_signal",  # Starts with non-zero offset.
196      "art_quick_generic_jni_trampoline",  # Saves/restores SP in other register.
197      "nterp_op_",  # Uses calculated CFA due to dynamic stack size.
198      "$d.",  # Data (literals) interleaved within code.
199  ]
200  fdes = get_fde(lib)
201  asms = collections.deque(get_asm(lib))
202  srcs = {src.addr: src.file + ":" + src.line for src in get_source(lib)}
203  seen = set()  # Used to verify the we have covered all assembly source lines.
204
205  for fde in fdes:
206    if fde.addr not in srcs:
207      continue  # Ignore if it is not hand-written assembly.
208
209    # Assembly instructions (one FDE can cover several assembly chunks).
210    all_insts, name = [], None
211    while asms and asms[0].addr < fde.end:
212      asm = asms.popleft()
213      if asm.addr < fde.addr:
214        continue
215      insts = get_instructions(asm)
216      if any(asm.name.startswith(i) for i in IGNORE):
217        seen.update([inst.addr for inst in insts])
218        continue
219      all_insts.extend(insts)
220      name = name or asm.name
221    if not all_insts:
222      continue  # No assembly
223
224    # Compare DWARF data to assembly instructions
225    error, seen_addrs = check_fde(fde, all_insts, srcs)
226    if error:
227      print("ERROR at " + name + " " + error)
228      check_fde(fde, all_insts, srcs, True)
229      print("")
230    seen.update(seen_addrs)
231  for addr in sorted(set(srcs.keys()) - seen):
232    print("Missing CFI for {:08x}: {}".format(addr, srcs[addr]))
233
234
235def main(argv):
236  """ Check libraries provided on the command line, or use the default build output """
237
238  libs = argv[1:]
239  if not libs:
240    out = os.environ["OUT"]
241    libs.append(out + "/symbols/apex/com.android.art/lib/libart.so")
242    libs.append(out + "/symbols/apex/com.android.art/lib64/libart.so")
243  for lib in libs:
244    check_lib(pathlib.Path(lib))
245
246if __name__ == "__main__":
247  main(os.sys.argv)
248