1 //===-- ProfiledBinary.cpp - Binary decoder ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "ProfiledBinary.h"
10 #include "ErrorHandling.h"
11 #include "llvm/ADT/Triple.h"
12 #include "llvm/Demangle/Demangle.h"
13 #include "llvm/Support/CommandLine.h"
14 #include "llvm/Support/Format.h"
15 #include "llvm/Support/TargetRegistry.h"
16 #include "llvm/Support/TargetSelect.h"
17
18 #define DEBUG_TYPE "load-binary"
19
20 using namespace llvm;
21 using namespace sampleprof;
22
23 static cl::opt<bool> ShowDisassembly("show-disassembly", cl::ReallyHidden,
24 cl::init(false), cl::ZeroOrMore,
25 cl::desc("Print disassembled code."));
26
27 static cl::opt<bool> ShowSourceLocations("show-source-locations",
28 cl::ReallyHidden, cl::init(false),
29 cl::ZeroOrMore,
30 cl::desc("Print source locations."));
31
32 namespace llvm {
33 namespace sampleprof {
34
getTarget(const ObjectFile * Obj)35 static const Target *getTarget(const ObjectFile *Obj) {
36 Triple TheTriple = Obj->makeTriple();
37 std::string Error;
38 std::string ArchName;
39 const Target *TheTarget =
40 TargetRegistry::lookupTarget(ArchName, TheTriple, Error);
41 if (!TheTarget)
42 exitWithError(Error, Obj->getFileName());
43 return TheTarget;
44 }
45
46 template <class ELFT>
getELFImageLMAForSec(const ELFFile<ELFT> & Obj,const object::ELFSectionRef & Sec,StringRef FileName)47 static uint64_t getELFImageLMAForSec(const ELFFile<ELFT> &Obj,
48 const object::ELFSectionRef &Sec,
49 StringRef FileName) {
50 // Search for a PT_LOAD segment containing the requested section. Return this
51 // segment's p_addr as the image load address for the section.
52 const auto &PhdrRange = unwrapOrError(Obj.program_headers(), FileName);
53 for (const typename ELFT::Phdr &Phdr : PhdrRange)
54 if ((Phdr.p_type == ELF::PT_LOAD) && (Phdr.p_vaddr <= Sec.getAddress()) &&
55 (Phdr.p_vaddr + Phdr.p_memsz > Sec.getAddress()))
56 // Segments will always be loaded at a page boundary.
57 return Phdr.p_paddr & ~(Phdr.p_align - 1U);
58 return 0;
59 }
60
61 // Get the image load address for a specific section. Note that an image is
62 // loaded by segments (a group of sections) and segments may not be consecutive
63 // in memory.
getELFImageLMAForSec(const object::ELFSectionRef & Sec)64 static uint64_t getELFImageLMAForSec(const object::ELFSectionRef &Sec) {
65 if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Sec.getObject()))
66 return getELFImageLMAForSec(ELFObj->getELFFile(), Sec,
67 ELFObj->getFileName());
68 else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Sec.getObject()))
69 return getELFImageLMAForSec(ELFObj->getELFFile(), Sec,
70 ELFObj->getFileName());
71 else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Sec.getObject()))
72 return getELFImageLMAForSec(ELFObj->getELFFile(), Sec,
73 ELFObj->getFileName());
74 const auto *ELFObj = cast<ELF64BEObjectFile>(Sec.getObject());
75 return getELFImageLMAForSec(ELFObj->getELFFile(), Sec, ELFObj->getFileName());
76 }
77
load()78 void ProfiledBinary::load() {
79 // Attempt to open the binary.
80 OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path);
81 Binary &Binary = *OBinary.getBinary();
82
83 auto *Obj = dyn_cast<ELFObjectFileBase>(&Binary);
84 if (!Obj)
85 exitWithError("not a valid Elf image", Path);
86
87 TheTriple = Obj->makeTriple();
88 // Current only support X86
89 if (!TheTriple.isX86())
90 exitWithError("unsupported target", TheTriple.getTriple());
91 LLVM_DEBUG(dbgs() << "Loading " << Path << "\n");
92
93 // Find the preferred base address for text sections.
94 setPreferredBaseAddress(Obj);
95
96 // Disassemble the text sections.
97 disassemble(Obj);
98
99 // Use function start and return address to infer prolog and epilog
100 ProEpilogTracker.inferPrologOffsets(FuncStartAddrMap);
101 ProEpilogTracker.inferEpilogOffsets(RetAddrs);
102
103 // TODO: decode other sections.
104
105 return;
106 }
107
inlineContextEqual(uint64_t Address1,uint64_t Address2) const108 bool ProfiledBinary::inlineContextEqual(uint64_t Address1,
109 uint64_t Address2) const {
110 uint64_t Offset1 = virtualAddrToOffset(Address1);
111 uint64_t Offset2 = virtualAddrToOffset(Address2);
112 const FrameLocationStack &Context1 = getFrameLocationStack(Offset1);
113 const FrameLocationStack &Context2 = getFrameLocationStack(Offset2);
114 if (Context1.size() != Context2.size())
115 return false;
116
117 // The leaf frame contains location within the leaf, and it
118 // needs to be remove that as it's not part of the calling context
119 return std::equal(Context1.begin(), Context1.begin() + Context1.size() - 1,
120 Context2.begin(), Context2.begin() + Context2.size() - 1);
121 }
122
123 std::string
getExpandedContextStr(const std::list<uint64_t> & Stack) const124 ProfiledBinary::getExpandedContextStr(const std::list<uint64_t> &Stack) const {
125 std::string ContextStr;
126 SmallVector<std::string, 8> ContextVec;
127 // Process from frame root to leaf
128 for (auto Iter = Stack.rbegin(); Iter != Stack.rend(); Iter++) {
129 uint64_t Offset = virtualAddrToOffset(*Iter);
130 const FrameLocationStack &ExpandedContext = getFrameLocationStack(Offset);
131 for (const auto &Loc : ExpandedContext) {
132 ContextVec.push_back(getCallSite(Loc));
133 }
134 }
135
136 assert(ContextVec.size() && "Context length should be at least 1");
137
138 std::ostringstream OContextStr;
139 for (uint32_t I = 0; I < (uint32_t)ContextVec.size(); I++) {
140 if (OContextStr.str().size()) {
141 OContextStr << " @ ";
142 }
143
144 if (I == ContextVec.size() - 1) {
145 // Only keep the function name for the leaf frame
146 StringRef Ref(ContextVec[I]);
147 OContextStr << Ref.split(":").first.str();
148 } else {
149 OContextStr << ContextVec[I];
150 }
151 }
152
153 return OContextStr.str();
154 }
155
setPreferredBaseAddress(const ELFObjectFileBase * Obj)156 void ProfiledBinary::setPreferredBaseAddress(const ELFObjectFileBase *Obj) {
157 for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
158 SI != SE; ++SI) {
159 const SectionRef &Section = *SI;
160 if (Section.isText()) {
161 PreferredBaseAddress = getELFImageLMAForSec(Section);
162 return;
163 }
164 }
165 exitWithError("no text section found", Obj->getFileName());
166 }
167
dissassembleSymbol(std::size_t SI,ArrayRef<uint8_t> Bytes,SectionSymbolsTy & Symbols,const SectionRef & Section)168 bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
169 SectionSymbolsTy &Symbols,
170 const SectionRef &Section) {
171
172 std::size_t SE = Symbols.size();
173 uint64_t SectionOffset = Section.getAddress() - PreferredBaseAddress;
174 uint64_t SectSize = Section.getSize();
175 uint64_t StartOffset = Symbols[SI].Addr - PreferredBaseAddress;
176 uint64_t EndOffset = (SI + 1 < SE)
177 ? Symbols[SI + 1].Addr - PreferredBaseAddress
178 : SectionOffset + SectSize;
179 if (StartOffset >= EndOffset)
180 return true;
181
182 std::string &&SymbolName = Symbols[SI].Name.str();
183 if (ShowDisassembly)
184 outs() << '<' << SymbolName << ">:\n";
185
186 uint64_t Offset = StartOffset;
187 while (Offset < EndOffset) {
188 MCInst Inst;
189 uint64_t Size;
190 // Disassemble an instruction.
191 if (!DisAsm->getInstruction(Inst, Size, Bytes.slice(Offset - SectionOffset),
192 Offset + PreferredBaseAddress, nulls()))
193 return false;
194
195 if (ShowDisassembly) {
196 outs() << format("%8" PRIx64 ":", Offset);
197 size_t Start = outs().tell();
198 IPrinter->printInst(&Inst, Offset + Size, "", *STI.get(), outs());
199 if (ShowSourceLocations) {
200 unsigned Cur = outs().tell() - Start;
201 if (Cur < 40)
202 outs().indent(40 - Cur);
203 InstructionPointer Inst(this, Offset);
204 outs() << getReversedLocWithContext(symbolize(Inst));
205 }
206 outs() << "\n";
207 }
208
209 const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode());
210
211 // Populate a vector of the symbolized callsite at this location
212 InstructionPointer IP(this, Offset);
213 Offset2LocStackMap[Offset] = symbolize(IP, true);
214
215 // Populate address maps.
216 CodeAddrs.push_back(Offset);
217 if (MCDesc.isCall())
218 CallAddrs.insert(Offset);
219 else if (MCDesc.isReturn())
220 RetAddrs.insert(Offset);
221
222 Offset += Size;
223 }
224
225 if (ShowDisassembly)
226 outs() << "\n";
227
228 FuncStartAddrMap[StartOffset] = Symbols[SI].Name.str();
229 return true;
230 }
231
setUpDisassembler(const ELFObjectFileBase * Obj)232 void ProfiledBinary::setUpDisassembler(const ELFObjectFileBase *Obj) {
233 const Target *TheTarget = getTarget(Obj);
234 std::string TripleName = TheTriple.getTriple();
235 StringRef FileName = Obj->getFileName();
236
237 MRI.reset(TheTarget->createMCRegInfo(TripleName));
238 if (!MRI)
239 exitWithError("no register info for target " + TripleName, FileName);
240
241 MCTargetOptions MCOptions;
242 AsmInfo.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
243 if (!AsmInfo)
244 exitWithError("no assembly info for target " + TripleName, FileName);
245
246 SubtargetFeatures Features = Obj->getFeatures();
247 STI.reset(
248 TheTarget->createMCSubtargetInfo(TripleName, "", Features.getString()));
249 if (!STI)
250 exitWithError("no subtarget info for target " + TripleName, FileName);
251
252 MII.reset(TheTarget->createMCInstrInfo());
253 if (!MII)
254 exitWithError("no instruction info for target " + TripleName, FileName);
255
256 MCObjectFileInfo MOFI;
257 MCContext Ctx(AsmInfo.get(), MRI.get(), &MOFI);
258 MOFI.InitMCObjectFileInfo(Triple(TripleName), false, Ctx);
259 DisAsm.reset(TheTarget->createMCDisassembler(*STI, Ctx));
260 if (!DisAsm)
261 exitWithError("no disassembler for target " + TripleName, FileName);
262
263 MIA.reset(TheTarget->createMCInstrAnalysis(MII.get()));
264
265 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
266 IPrinter.reset(TheTarget->createMCInstPrinter(
267 Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI));
268 IPrinter->setPrintBranchImmAsAddress(true);
269 }
270
disassemble(const ELFObjectFileBase * Obj)271 void ProfiledBinary::disassemble(const ELFObjectFileBase *Obj) {
272 // Set up disassembler and related components.
273 setUpDisassembler(Obj);
274
275 // Create a mapping from virtual address to symbol name. The symbols in text
276 // sections are the candidates to dissassemble.
277 std::map<SectionRef, SectionSymbolsTy> AllSymbols;
278 StringRef FileName = Obj->getFileName();
279 for (const SymbolRef &Symbol : Obj->symbols()) {
280 const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
281 const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
282 section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName);
283 if (SecI != Obj->section_end())
284 AllSymbols[*SecI].push_back(SymbolInfoTy(Addr, Name, ELF::STT_NOTYPE));
285 }
286
287 // Sort all the symbols. Use a stable sort to stabilize the output.
288 for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols)
289 stable_sort(SecSyms.second);
290
291 if (ShowDisassembly)
292 outs() << "\nDisassembly of " << FileName << ":\n";
293
294 // Dissassemble a text section.
295 for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
296 SI != SE; ++SI) {
297 const SectionRef &Section = *SI;
298 if (!Section.isText())
299 continue;
300
301 uint64_t ImageLoadAddr = PreferredBaseAddress;
302 uint64_t SectionOffset = Section.getAddress() - ImageLoadAddr;
303 uint64_t SectSize = Section.getSize();
304 if (!SectSize)
305 continue;
306
307 // Register the text section.
308 TextSections.insert({SectionOffset, SectSize});
309
310 if (ShowDisassembly) {
311 StringRef SectionName = unwrapOrError(Section.getName(), FileName);
312 outs() << "\nDisassembly of section " << SectionName;
313 outs() << " [" << format("0x%" PRIx64, SectionOffset) << ", "
314 << format("0x%" PRIx64, SectionOffset + SectSize) << "]:\n\n";
315 }
316
317 // Get the section data.
318 ArrayRef<uint8_t> Bytes =
319 arrayRefFromStringRef(unwrapOrError(Section.getContents(), FileName));
320
321 // Get the list of all the symbols in this section.
322 SectionSymbolsTy &Symbols = AllSymbols[Section];
323
324 // Disassemble symbol by symbol.
325 for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
326 if (!dissassembleSymbol(SI, Bytes, Symbols, Section))
327 exitWithError("disassembling error", FileName);
328 }
329 }
330 }
331
setupSymbolizer()332 void ProfiledBinary::setupSymbolizer() {
333 symbolize::LLVMSymbolizer::Options SymbolizerOpts;
334 SymbolizerOpts.PrintFunctions =
335 DILineInfoSpecifier::FunctionNameKind::LinkageName;
336 SymbolizerOpts.Demangle = false;
337 SymbolizerOpts.DefaultArch = TheTriple.getArchName().str();
338 SymbolizerOpts.UseSymbolTable = false;
339 SymbolizerOpts.RelativeAddresses = false;
340 Symbolizer = std::make_unique<symbolize::LLVMSymbolizer>(SymbolizerOpts);
341 }
342
symbolize(const InstructionPointer & IP,bool UseCanonicalFnName)343 FrameLocationStack ProfiledBinary::symbolize(const InstructionPointer &IP,
344 bool UseCanonicalFnName) {
345 assert(this == IP.Binary &&
346 "Binary should only symbolize its own instruction");
347 auto Addr = object::SectionedAddress{IP.Offset + PreferredBaseAddress,
348 object::SectionedAddress::UndefSection};
349 DIInliningInfo InlineStack =
350 unwrapOrError(Symbolizer->symbolizeInlinedCode(Path, Addr), getName());
351
352 FrameLocationStack CallStack;
353
354 for (int32_t I = InlineStack.getNumberOfFrames() - 1; I >= 0; I--) {
355 const auto &CallerFrame = InlineStack.getFrame(I);
356 if (CallerFrame.FunctionName == "<invalid>")
357 break;
358 StringRef FunctionName(CallerFrame.FunctionName);
359 if (UseCanonicalFnName)
360 FunctionName = FunctionSamples::getCanonicalFnName(FunctionName);
361 LineLocation Line(CallerFrame.Line - CallerFrame.StartLine,
362 CallerFrame.Discriminator);
363 FrameLocation Callsite(FunctionName.str(), Line);
364 CallStack.push_back(Callsite);
365 }
366
367 return CallStack;
368 }
369
InstructionPointer(ProfiledBinary * Binary,uint64_t Address,bool RoundToNext)370 InstructionPointer::InstructionPointer(ProfiledBinary *Binary, uint64_t Address,
371 bool RoundToNext)
372 : Binary(Binary), Address(Address) {
373 Index = Binary->getIndexForAddr(Address);
374 if (RoundToNext) {
375 // we might get address which is not the code
376 // it should round to the next valid address
377 this->Address = Binary->getAddressforIndex(Index);
378 }
379 }
380
advance()381 void InstructionPointer::advance() {
382 Index++;
383 Address = Binary->getAddressforIndex(Index);
384 }
385
backward()386 void InstructionPointer::backward() {
387 Index--;
388 Address = Binary->getAddressforIndex(Index);
389 }
390
update(uint64_t Addr)391 void InstructionPointer::update(uint64_t Addr) {
392 Address = Addr;
393 Index = Binary->getIndexForAddr(Address);
394 }
395
396 } // end namespace sampleprof
397 } // end namespace llvm
398