1 //===- SymbolizableObjectFile.cpp -----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implementation of SymbolizableObjectFile class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "SymbolizableObjectFile.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/ADT/Triple.h"
17 #include "llvm/BinaryFormat/COFF.h"
18 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
19 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
20 #include "llvm/Object/COFF.h"
21 #include "llvm/Object/ObjectFile.h"
22 #include "llvm/Object/SymbolSize.h"
23 #include "llvm/Support/Casting.h"
24 #include "llvm/Support/DataExtractor.h"
25 #include "llvm/Support/Error.h"
26 #include <algorithm>
27 #include <cstdint>
28 #include <memory>
29 #include <string>
30 #include <system_error>
31 #include <utility>
32 #include <vector>
33
34 using namespace llvm;
35 using namespace object;
36 using namespace symbolize;
37
38 static DILineInfoSpecifier
getDILineInfoSpecifier(FunctionNameKind FNKind)39 getDILineInfoSpecifier(FunctionNameKind FNKind) {
40 return DILineInfoSpecifier(
41 DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, FNKind);
42 }
43
44 ErrorOr<std::unique_ptr<SymbolizableObjectFile>>
create(const object::ObjectFile * Obj,std::unique_ptr<DIContext> DICtx,bool UntagAddresses)45 SymbolizableObjectFile::create(const object::ObjectFile *Obj,
46 std::unique_ptr<DIContext> DICtx,
47 bool UntagAddresses) {
48 assert(DICtx);
49 std::unique_ptr<SymbolizableObjectFile> res(
50 new SymbolizableObjectFile(Obj, std::move(DICtx), UntagAddresses));
51 std::unique_ptr<DataExtractor> OpdExtractor;
52 uint64_t OpdAddress = 0;
53 // Find the .opd (function descriptor) section if any, for big-endian
54 // PowerPC64 ELF.
55 if (Obj->getArch() == Triple::ppc64) {
56 for (section_iterator Section : Obj->sections()) {
57 Expected<StringRef> NameOrErr = Section->getName();
58 if (!NameOrErr)
59 return errorToErrorCode(NameOrErr.takeError());
60
61 if (*NameOrErr == ".opd") {
62 Expected<StringRef> E = Section->getContents();
63 if (!E)
64 return errorToErrorCode(E.takeError());
65 OpdExtractor.reset(new DataExtractor(*E, Obj->isLittleEndian(),
66 Obj->getBytesInAddress()));
67 OpdAddress = Section->getAddress();
68 break;
69 }
70 }
71 }
72 std::vector<std::pair<SymbolRef, uint64_t>> Symbols =
73 computeSymbolSizes(*Obj);
74 for (auto &P : Symbols)
75 res->addSymbol(P.first, P.second, OpdExtractor.get(), OpdAddress);
76
77 // If this is a COFF object and we didn't find any symbols, try the export
78 // table.
79 if (Symbols.empty()) {
80 if (auto *CoffObj = dyn_cast<COFFObjectFile>(Obj))
81 if (auto EC = res->addCoffExportSymbols(CoffObj))
82 return EC;
83 }
84
85 std::vector<std::pair<SymbolDesc, StringRef>> &Fs = res->Functions,
86 &Os = res->Objects;
87 auto Uniquify = [](std::vector<std::pair<SymbolDesc, StringRef>> &S) {
88 // Sort by (Addr,Size,Name). If several SymbolDescs share the same Addr,
89 // pick the one with the largest Size. This helps us avoid symbols with no
90 // size information (Size=0).
91 llvm::sort(S);
92 auto I = S.begin(), E = S.end(), J = S.begin();
93 while (I != E) {
94 auto OI = I;
95 while (++I != E && OI->first.Addr == I->first.Addr) {
96 }
97 *J++ = I[-1];
98 }
99 S.erase(J, S.end());
100 };
101 Uniquify(Fs);
102 Uniquify(Os);
103
104 return std::move(res);
105 }
106
SymbolizableObjectFile(const ObjectFile * Obj,std::unique_ptr<DIContext> DICtx,bool UntagAddresses)107 SymbolizableObjectFile::SymbolizableObjectFile(const ObjectFile *Obj,
108 std::unique_ptr<DIContext> DICtx,
109 bool UntagAddresses)
110 : Module(Obj), DebugInfoContext(std::move(DICtx)),
111 UntagAddresses(UntagAddresses) {}
112
113 namespace {
114
115 struct OffsetNamePair {
116 uint32_t Offset;
117 StringRef Name;
118
operator <__anonbe0273cd0211::OffsetNamePair119 bool operator<(const OffsetNamePair &R) const {
120 return Offset < R.Offset;
121 }
122 };
123
124 } // end anonymous namespace
125
addCoffExportSymbols(const COFFObjectFile * CoffObj)126 std::error_code SymbolizableObjectFile::addCoffExportSymbols(
127 const COFFObjectFile *CoffObj) {
128 // Get all export names and offsets.
129 std::vector<OffsetNamePair> ExportSyms;
130 for (const ExportDirectoryEntryRef &Ref : CoffObj->export_directories()) {
131 StringRef Name;
132 uint32_t Offset;
133 if (auto EC = Ref.getSymbolName(Name))
134 return EC;
135 if (auto EC = Ref.getExportRVA(Offset))
136 return EC;
137 ExportSyms.push_back(OffsetNamePair{Offset, Name});
138 }
139 if (ExportSyms.empty())
140 return std::error_code();
141
142 // Sort by ascending offset.
143 array_pod_sort(ExportSyms.begin(), ExportSyms.end());
144
145 // Approximate the symbol sizes by assuming they run to the next symbol.
146 // FIXME: This assumes all exports are functions.
147 uint64_t ImageBase = CoffObj->getImageBase();
148 for (auto I = ExportSyms.begin(), E = ExportSyms.end(); I != E; ++I) {
149 OffsetNamePair &Export = *I;
150 // FIXME: The last export has a one byte size now.
151 uint32_t NextOffset = I != E ? I->Offset : Export.Offset + 1;
152 uint64_t SymbolStart = ImageBase + Export.Offset;
153 uint64_t SymbolSize = NextOffset - Export.Offset;
154 SymbolDesc SD = {SymbolStart, SymbolSize};
155 Functions.emplace_back(SD, Export.Name);
156 }
157 return std::error_code();
158 }
159
addSymbol(const SymbolRef & Symbol,uint64_t SymbolSize,DataExtractor * OpdExtractor,uint64_t OpdAddress)160 std::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol,
161 uint64_t SymbolSize,
162 DataExtractor *OpdExtractor,
163 uint64_t OpdAddress) {
164 // Avoid adding symbols from an unknown/undefined section.
165 const ObjectFile *Obj = Symbol.getObject();
166 Expected<section_iterator> Sec = Symbol.getSection();
167 if (!Sec || (Obj && Obj->section_end() == *Sec))
168 return std::error_code();
169 Expected<SymbolRef::Type> SymbolTypeOrErr = Symbol.getType();
170 if (!SymbolTypeOrErr)
171 return errorToErrorCode(SymbolTypeOrErr.takeError());
172 SymbolRef::Type SymbolType = *SymbolTypeOrErr;
173 if (SymbolType != SymbolRef::ST_Function && SymbolType != SymbolRef::ST_Data)
174 return std::error_code();
175 Expected<uint64_t> SymbolAddressOrErr = Symbol.getAddress();
176 if (!SymbolAddressOrErr)
177 return errorToErrorCode(SymbolAddressOrErr.takeError());
178 uint64_t SymbolAddress = *SymbolAddressOrErr;
179 if (UntagAddresses) {
180 // For kernel addresses, bits 56-63 need to be set, so we sign extend bit 55
181 // into bits 56-63 instead of masking them out.
182 SymbolAddress &= (1ull << 56) - 1;
183 SymbolAddress = (int64_t(SymbolAddress) << 8) >> 8;
184 }
185 if (OpdExtractor) {
186 // For big-endian PowerPC64 ELF, symbols in the .opd section refer to
187 // function descriptors. The first word of the descriptor is a pointer to
188 // the function's code.
189 // For the purposes of symbolization, pretend the symbol's address is that
190 // of the function's code, not the descriptor.
191 uint64_t OpdOffset = SymbolAddress - OpdAddress;
192 if (OpdExtractor->isValidOffsetForAddress(OpdOffset))
193 SymbolAddress = OpdExtractor->getAddress(&OpdOffset);
194 }
195 Expected<StringRef> SymbolNameOrErr = Symbol.getName();
196 if (!SymbolNameOrErr)
197 return errorToErrorCode(SymbolNameOrErr.takeError());
198 StringRef SymbolName = *SymbolNameOrErr;
199 // Mach-O symbol table names have leading underscore, skip it.
200 if (Module->isMachO() && !SymbolName.empty() && SymbolName[0] == '_')
201 SymbolName = SymbolName.drop_front();
202 // FIXME: If a function has alias, there are two entries in symbol table
203 // with same address size. Make sure we choose the correct one.
204 auto &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects;
205 SymbolDesc SD = { SymbolAddress, SymbolSize };
206 M.emplace_back(SD, SymbolName);
207 return std::error_code();
208 }
209
210 // Return true if this is a 32-bit x86 PE COFF module.
isWin32Module() const211 bool SymbolizableObjectFile::isWin32Module() const {
212 auto *CoffObject = dyn_cast<COFFObjectFile>(Module);
213 return CoffObject && CoffObject->getMachine() == COFF::IMAGE_FILE_MACHINE_I386;
214 }
215
getModulePreferredBase() const216 uint64_t SymbolizableObjectFile::getModulePreferredBase() const {
217 if (auto *CoffObject = dyn_cast<COFFObjectFile>(Module))
218 return CoffObject->getImageBase();
219 return 0;
220 }
221
getNameFromSymbolTable(SymbolRef::Type Type,uint64_t Address,std::string & Name,uint64_t & Addr,uint64_t & Size) const222 bool SymbolizableObjectFile::getNameFromSymbolTable(SymbolRef::Type Type,
223 uint64_t Address,
224 std::string &Name,
225 uint64_t &Addr,
226 uint64_t &Size) const {
227 const auto &Symbols = Type == SymbolRef::ST_Function ? Functions : Objects;
228 std::pair<SymbolDesc, StringRef> SD{{Address, UINT64_C(-1)}, StringRef()};
229 auto SymbolIterator = llvm::upper_bound(Symbols, SD);
230 if (SymbolIterator == Symbols.begin())
231 return false;
232 --SymbolIterator;
233 if (SymbolIterator->first.Size != 0 &&
234 SymbolIterator->first.Addr + SymbolIterator->first.Size <= Address)
235 return false;
236 Name = SymbolIterator->second.str();
237 Addr = SymbolIterator->first.Addr;
238 Size = SymbolIterator->first.Size;
239 return true;
240 }
241
shouldOverrideWithSymbolTable(FunctionNameKind FNKind,bool UseSymbolTable) const242 bool SymbolizableObjectFile::shouldOverrideWithSymbolTable(
243 FunctionNameKind FNKind, bool UseSymbolTable) const {
244 // When DWARF is used with -gline-tables-only / -gmlt, the symbol table gives
245 // better answers for linkage names than the DIContext. Otherwise, we are
246 // probably using PEs and PDBs, and we shouldn't do the override. PE files
247 // generally only contain the names of exported symbols.
248 return FNKind == FunctionNameKind::LinkageName && UseSymbolTable &&
249 isa<DWARFContext>(DebugInfoContext.get());
250 }
251
252 DILineInfo
symbolizeCode(object::SectionedAddress ModuleOffset,FunctionNameKind FNKind,bool UseSymbolTable) const253 SymbolizableObjectFile::symbolizeCode(object::SectionedAddress ModuleOffset,
254 FunctionNameKind FNKind,
255 bool UseSymbolTable) const {
256 if (ModuleOffset.SectionIndex == object::SectionedAddress::UndefSection)
257 ModuleOffset.SectionIndex =
258 getModuleSectionIndexForAddress(ModuleOffset.Address);
259 DILineInfo LineInfo = DebugInfoContext->getLineInfoForAddress(
260 ModuleOffset, getDILineInfoSpecifier(FNKind));
261
262 // Override function name from symbol table if necessary.
263 if (shouldOverrideWithSymbolTable(FNKind, UseSymbolTable)) {
264 std::string FunctionName;
265 uint64_t Start, Size;
266 if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address,
267 FunctionName, Start, Size)) {
268 LineInfo.FunctionName = FunctionName;
269 }
270 }
271 return LineInfo;
272 }
273
symbolizeInlinedCode(object::SectionedAddress ModuleOffset,FunctionNameKind FNKind,bool UseSymbolTable) const274 DIInliningInfo SymbolizableObjectFile::symbolizeInlinedCode(
275 object::SectionedAddress ModuleOffset, FunctionNameKind FNKind,
276 bool UseSymbolTable) const {
277 if (ModuleOffset.SectionIndex == object::SectionedAddress::UndefSection)
278 ModuleOffset.SectionIndex =
279 getModuleSectionIndexForAddress(ModuleOffset.Address);
280 DIInliningInfo InlinedContext = DebugInfoContext->getInliningInfoForAddress(
281 ModuleOffset, getDILineInfoSpecifier(FNKind));
282
283 // Make sure there is at least one frame in context.
284 if (InlinedContext.getNumberOfFrames() == 0)
285 InlinedContext.addFrame(DILineInfo());
286
287 // Override the function name in lower frame with name from symbol table.
288 if (shouldOverrideWithSymbolTable(FNKind, UseSymbolTable)) {
289 std::string FunctionName;
290 uint64_t Start, Size;
291 if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address,
292 FunctionName, Start, Size)) {
293 InlinedContext.getMutableFrame(InlinedContext.getNumberOfFrames() - 1)
294 ->FunctionName = FunctionName;
295 }
296 }
297
298 return InlinedContext;
299 }
300
symbolizeData(object::SectionedAddress ModuleOffset) const301 DIGlobal SymbolizableObjectFile::symbolizeData(
302 object::SectionedAddress ModuleOffset) const {
303 DIGlobal Res;
304 getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset.Address, Res.Name,
305 Res.Start, Res.Size);
306 return Res;
307 }
308
symbolizeFrame(object::SectionedAddress ModuleOffset) const309 std::vector<DILocal> SymbolizableObjectFile::symbolizeFrame(
310 object::SectionedAddress ModuleOffset) const {
311 if (ModuleOffset.SectionIndex == object::SectionedAddress::UndefSection)
312 ModuleOffset.SectionIndex =
313 getModuleSectionIndexForAddress(ModuleOffset.Address);
314 return DebugInfoContext->getLocalsForAddress(ModuleOffset);
315 }
316
317 /// Search for the first occurence of specified Address in ObjectFile.
getModuleSectionIndexForAddress(uint64_t Address) const318 uint64_t SymbolizableObjectFile::getModuleSectionIndexForAddress(
319 uint64_t Address) const {
320
321 for (SectionRef Sec : Module->sections()) {
322 if (!Sec.isText() || Sec.isVirtual())
323 continue;
324
325 if (Address >= Sec.getAddress() &&
326 Address < Sec.getAddress() + Sec.getSize())
327 return Sec.getIndex();
328 }
329
330 return object::SectionedAddress::UndefSection;
331 }
332