1 //===- FileAnalysis.cpp -----------------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9
10 #include "FileAnalysis.h"
11 #include "GraphBuilder.h"
12
13 #include "llvm/BinaryFormat/ELF.h"
14 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
15 #include "llvm/MC/MCAsmInfo.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
18 #include "llvm/MC/MCInst.h"
19 #include "llvm/MC/MCInstPrinter.h"
20 #include "llvm/MC/MCInstrAnalysis.h"
21 #include "llvm/MC/MCInstrDesc.h"
22 #include "llvm/MC/MCInstrInfo.h"
23 #include "llvm/MC/MCObjectFileInfo.h"
24 #include "llvm/MC/MCRegisterInfo.h"
25 #include "llvm/MC/MCSubtargetInfo.h"
26 #include "llvm/Object/Binary.h"
27 #include "llvm/Object/COFF.h"
28 #include "llvm/Object/ELFObjectFile.h"
29 #include "llvm/Object/ObjectFile.h"
30 #include "llvm/Support/Casting.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/Error.h"
33 #include "llvm/Support/MemoryBuffer.h"
34 #include "llvm/Support/TargetRegistry.h"
35 #include "llvm/Support/TargetSelect.h"
36 #include "llvm/Support/raw_ostream.h"
37
38
39 using Instr = llvm::cfi_verify::FileAnalysis::Instr;
40 using LLVMSymbolizer = llvm::symbolize::LLVMSymbolizer;
41
42 namespace llvm {
43 namespace cfi_verify {
44
45 bool IgnoreDWARFFlag;
46
47 static cl::opt<bool, true> IgnoreDWARFArg(
48 "ignore-dwarf",
49 cl::desc(
50 "Ignore all DWARF data. This relaxes the requirements for all "
51 "statically linked libraries to have been compiled with '-g', but "
52 "will result in false positives for 'CFI unprotected' instructions."),
53 cl::location(IgnoreDWARFFlag), cl::init(false));
54
stringCFIProtectionStatus(CFIProtectionStatus Status)55 StringRef stringCFIProtectionStatus(CFIProtectionStatus Status) {
56 switch (Status) {
57 case CFIProtectionStatus::PROTECTED:
58 return "PROTECTED";
59 case CFIProtectionStatus::FAIL_NOT_INDIRECT_CF:
60 return "FAIL_NOT_INDIRECT_CF";
61 case CFIProtectionStatus::FAIL_ORPHANS:
62 return "FAIL_ORPHANS";
63 case CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH:
64 return "FAIL_BAD_CONDITIONAL_BRANCH";
65 case CFIProtectionStatus::FAIL_REGISTER_CLOBBERED:
66 return "FAIL_REGISTER_CLOBBERED";
67 case CFIProtectionStatus::FAIL_INVALID_INSTRUCTION:
68 return "FAIL_INVALID_INSTRUCTION";
69 }
70 llvm_unreachable("Attempted to stringify an unknown enum value.");
71 }
72
Create(StringRef Filename)73 Expected<FileAnalysis> FileAnalysis::Create(StringRef Filename) {
74 // Open the filename provided.
75 Expected<object::OwningBinary<object::Binary>> BinaryOrErr =
76 object::createBinary(Filename);
77 if (!BinaryOrErr)
78 return BinaryOrErr.takeError();
79
80 // Construct the object and allow it to take ownership of the binary.
81 object::OwningBinary<object::Binary> Binary = std::move(BinaryOrErr.get());
82 FileAnalysis Analysis(std::move(Binary));
83
84 Analysis.Object = dyn_cast<object::ObjectFile>(Analysis.Binary.getBinary());
85 if (!Analysis.Object)
86 return make_error<UnsupportedDisassembly>("Failed to cast object");
87
88 switch (Analysis.Object->getArch()) {
89 case Triple::x86:
90 case Triple::x86_64:
91 case Triple::aarch64:
92 case Triple::aarch64_be:
93 break;
94 default:
95 return make_error<UnsupportedDisassembly>("Unsupported architecture.");
96 }
97
98 Analysis.ObjectTriple = Analysis.Object->makeTriple();
99 Analysis.Features = Analysis.Object->getFeatures();
100
101 // Init the rest of the object.
102 if (auto InitResponse = Analysis.initialiseDisassemblyMembers())
103 return std::move(InitResponse);
104
105 if (auto SectionParseResponse = Analysis.parseCodeSections())
106 return std::move(SectionParseResponse);
107
108 return std::move(Analysis);
109 }
110
FileAnalysis(object::OwningBinary<object::Binary> Binary)111 FileAnalysis::FileAnalysis(object::OwningBinary<object::Binary> Binary)
112 : Binary(std::move(Binary)) {}
113
FileAnalysis(const Triple & ObjectTriple,const SubtargetFeatures & Features)114 FileAnalysis::FileAnalysis(const Triple &ObjectTriple,
115 const SubtargetFeatures &Features)
116 : ObjectTriple(ObjectTriple), Features(Features) {}
117
118 const Instr *
getPrevInstructionSequential(const Instr & InstrMeta) const119 FileAnalysis::getPrevInstructionSequential(const Instr &InstrMeta) const {
120 std::map<uint64_t, Instr>::const_iterator KV =
121 Instructions.find(InstrMeta.VMAddress);
122 if (KV == Instructions.end() || KV == Instructions.begin())
123 return nullptr;
124
125 if (!(--KV)->second.Valid)
126 return nullptr;
127
128 return &KV->second;
129 }
130
131 const Instr *
getNextInstructionSequential(const Instr & InstrMeta) const132 FileAnalysis::getNextInstructionSequential(const Instr &InstrMeta) const {
133 std::map<uint64_t, Instr>::const_iterator KV =
134 Instructions.find(InstrMeta.VMAddress);
135 if (KV == Instructions.end() || ++KV == Instructions.end())
136 return nullptr;
137
138 if (!KV->second.Valid)
139 return nullptr;
140
141 return &KV->second;
142 }
143
usesRegisterOperand(const Instr & InstrMeta) const144 bool FileAnalysis::usesRegisterOperand(const Instr &InstrMeta) const {
145 for (const auto &Operand : InstrMeta.Instruction) {
146 if (Operand.isReg())
147 return true;
148 }
149 return false;
150 }
151
getInstruction(uint64_t Address) const152 const Instr *FileAnalysis::getInstruction(uint64_t Address) const {
153 const auto &InstrKV = Instructions.find(Address);
154 if (InstrKV == Instructions.end())
155 return nullptr;
156
157 return &InstrKV->second;
158 }
159
getInstructionOrDie(uint64_t Address) const160 const Instr &FileAnalysis::getInstructionOrDie(uint64_t Address) const {
161 const auto &InstrKV = Instructions.find(Address);
162 assert(InstrKV != Instructions.end() && "Address doesn't exist.");
163 return InstrKV->second;
164 }
165
isCFITrap(const Instr & InstrMeta) const166 bool FileAnalysis::isCFITrap(const Instr &InstrMeta) const {
167 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
168 return InstrDesc.isTrap();
169 }
170
canFallThrough(const Instr & InstrMeta) const171 bool FileAnalysis::canFallThrough(const Instr &InstrMeta) const {
172 if (!InstrMeta.Valid)
173 return false;
174
175 if (isCFITrap(InstrMeta))
176 return false;
177
178 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
179 if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo))
180 return InstrDesc.isConditionalBranch();
181
182 return true;
183 }
184
185 const Instr *
getDefiniteNextInstruction(const Instr & InstrMeta) const186 FileAnalysis::getDefiniteNextInstruction(const Instr &InstrMeta) const {
187 if (!InstrMeta.Valid)
188 return nullptr;
189
190 if (isCFITrap(InstrMeta))
191 return nullptr;
192
193 const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
194 const Instr *NextMetaPtr;
195 if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) {
196 if (InstrDesc.isConditionalBranch())
197 return nullptr;
198
199 uint64_t Target;
200 if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress,
201 InstrMeta.InstructionSize, Target))
202 return nullptr;
203
204 NextMetaPtr = getInstruction(Target);
205 } else {
206 NextMetaPtr =
207 getInstruction(InstrMeta.VMAddress + InstrMeta.InstructionSize);
208 }
209
210 if (!NextMetaPtr || !NextMetaPtr->Valid)
211 return nullptr;
212
213 return NextMetaPtr;
214 }
215
216 std::set<const Instr *>
getDirectControlFlowXRefs(const Instr & InstrMeta) const217 FileAnalysis::getDirectControlFlowXRefs(const Instr &InstrMeta) const {
218 std::set<const Instr *> CFCrossReferences;
219 const Instr *PrevInstruction = getPrevInstructionSequential(InstrMeta);
220
221 if (PrevInstruction && canFallThrough(*PrevInstruction))
222 CFCrossReferences.insert(PrevInstruction);
223
224 const auto &TargetRefsKV = StaticBranchTargetings.find(InstrMeta.VMAddress);
225 if (TargetRefsKV == StaticBranchTargetings.end())
226 return CFCrossReferences;
227
228 for (uint64_t SourceInstrAddress : TargetRefsKV->second) {
229 const auto &SourceInstrKV = Instructions.find(SourceInstrAddress);
230 if (SourceInstrKV == Instructions.end()) {
231 errs() << "Failed to find source instruction at address "
232 << format_hex(SourceInstrAddress, 2)
233 << " for the cross-reference to instruction at address "
234 << format_hex(InstrMeta.VMAddress, 2) << ".\n";
235 continue;
236 }
237
238 CFCrossReferences.insert(&SourceInstrKV->second);
239 }
240
241 return CFCrossReferences;
242 }
243
getIndirectInstructions() const244 const std::set<uint64_t> &FileAnalysis::getIndirectInstructions() const {
245 return IndirectInstructions;
246 }
247
getRegisterInfo() const248 const MCRegisterInfo *FileAnalysis::getRegisterInfo() const {
249 return RegisterInfo.get();
250 }
251
getMCInstrInfo() const252 const MCInstrInfo *FileAnalysis::getMCInstrInfo() const { return MII.get(); }
253
getMCInstrAnalysis() const254 const MCInstrAnalysis *FileAnalysis::getMCInstrAnalysis() const {
255 return MIA.get();
256 }
257
symbolizeInlinedCode(uint64_t Address)258 Expected<DIInliningInfo> FileAnalysis::symbolizeInlinedCode(uint64_t Address) {
259 assert(Symbolizer != nullptr && "Symbolizer is invalid.");
260 return Symbolizer->symbolizeInlinedCode(Object->getFileName(), Address);
261 }
262
263 CFIProtectionStatus
validateCFIProtection(const GraphResult & Graph) const264 FileAnalysis::validateCFIProtection(const GraphResult &Graph) const {
265 const Instr *InstrMetaPtr = getInstruction(Graph.BaseAddress);
266 if (!InstrMetaPtr)
267 return CFIProtectionStatus::FAIL_INVALID_INSTRUCTION;
268
269 const auto &InstrDesc = MII->get(InstrMetaPtr->Instruction.getOpcode());
270 if (!InstrDesc.mayAffectControlFlow(InstrMetaPtr->Instruction, *RegisterInfo))
271 return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF;
272
273 if (!usesRegisterOperand(*InstrMetaPtr))
274 return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF;
275
276 if (!Graph.OrphanedNodes.empty())
277 return CFIProtectionStatus::FAIL_ORPHANS;
278
279 for (const auto &BranchNode : Graph.ConditionalBranchNodes) {
280 if (!BranchNode.CFIProtection)
281 return CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH;
282 }
283
284 if (indirectCFOperandClobber(Graph) != Graph.BaseAddress)
285 return CFIProtectionStatus::FAIL_REGISTER_CLOBBERED;
286
287 return CFIProtectionStatus::PROTECTED;
288 }
289
indirectCFOperandClobber(const GraphResult & Graph) const290 uint64_t FileAnalysis::indirectCFOperandClobber(const GraphResult &Graph) const {
291 assert(Graph.OrphanedNodes.empty() && "Orphaned nodes should be empty.");
292
293 // Get the set of registers we must check to ensure they're not clobbered.
294 const Instr &IndirectCF = getInstructionOrDie(Graph.BaseAddress);
295 DenseSet<unsigned> RegisterNumbers;
296 for (const auto &Operand : IndirectCF.Instruction) {
297 if (Operand.isReg())
298 RegisterNumbers.insert(Operand.getReg());
299 }
300 assert(RegisterNumbers.size() && "Zero register operands on indirect CF.");
301
302 // Now check all branches to indirect CFs and ensure no clobbering happens.
303 for (const auto &Branch : Graph.ConditionalBranchNodes) {
304 uint64_t Node;
305 if (Branch.IndirectCFIsOnTargetPath)
306 Node = Branch.Target;
307 else
308 Node = Branch.Fallthrough;
309
310 // Some architectures (e.g., AArch64) cannot load in an indirect branch, so
311 // we allow them one load.
312 bool canLoad = !MII->get(IndirectCF.Instruction.getOpcode()).mayLoad();
313
314 // We walk backwards from the indirect CF. It is the last node returned by
315 // Graph.flattenAddress, so we skip it since we already handled it.
316 DenseSet<unsigned> CurRegisterNumbers = RegisterNumbers;
317 std::vector<uint64_t> Nodes = Graph.flattenAddress(Node);
318 for (auto I = Nodes.rbegin() + 1, E = Nodes.rend(); I != E; ++I) {
319 Node = *I;
320 const Instr &NodeInstr = getInstructionOrDie(Node);
321 const auto &InstrDesc = MII->get(NodeInstr.Instruction.getOpcode());
322
323 for (auto RI = CurRegisterNumbers.begin(), RE = CurRegisterNumbers.end();
324 RI != RE; ++RI) {
325 unsigned RegNum = *RI;
326 if (InstrDesc.hasDefOfPhysReg(NodeInstr.Instruction, RegNum,
327 *RegisterInfo)) {
328 if (!canLoad || !InstrDesc.mayLoad())
329 return Node;
330 canLoad = false;
331 CurRegisterNumbers.erase(RI);
332 // Add the registers this load reads to those we check for clobbers.
333 for (unsigned i = InstrDesc.getNumDefs(),
334 e = InstrDesc.getNumOperands(); i != e; i++) {
335 const auto Operand = NodeInstr.Instruction.getOperand(i);
336 if (Operand.isReg())
337 CurRegisterNumbers.insert(Operand.getReg());
338 }
339 break;
340 }
341 }
342 }
343 }
344
345 return Graph.BaseAddress;
346 }
347
printInstruction(const Instr & InstrMeta,raw_ostream & OS) const348 void FileAnalysis::printInstruction(const Instr &InstrMeta,
349 raw_ostream &OS) const {
350 Printer->printInst(&InstrMeta.Instruction, OS, "", *SubtargetInfo.get());
351 }
352
initialiseDisassemblyMembers()353 Error FileAnalysis::initialiseDisassemblyMembers() {
354 std::string TripleName = ObjectTriple.getTriple();
355 ArchName = "";
356 MCPU = "";
357 std::string ErrorString;
358
359 Symbolizer.reset(new LLVMSymbolizer());
360
361 ObjectTarget =
362 TargetRegistry::lookupTarget(ArchName, ObjectTriple, ErrorString);
363 if (!ObjectTarget)
364 return make_error<UnsupportedDisassembly>(
365 (Twine("Couldn't find target \"") + ObjectTriple.getTriple() +
366 "\", failed with error: " + ErrorString)
367 .str());
368
369 RegisterInfo.reset(ObjectTarget->createMCRegInfo(TripleName));
370 if (!RegisterInfo)
371 return make_error<UnsupportedDisassembly>(
372 "Failed to initialise RegisterInfo.");
373
374 AsmInfo.reset(ObjectTarget->createMCAsmInfo(*RegisterInfo, TripleName));
375 if (!AsmInfo)
376 return make_error<UnsupportedDisassembly>("Failed to initialise AsmInfo.");
377
378 SubtargetInfo.reset(ObjectTarget->createMCSubtargetInfo(
379 TripleName, MCPU, Features.getString()));
380 if (!SubtargetInfo)
381 return make_error<UnsupportedDisassembly>(
382 "Failed to initialise SubtargetInfo.");
383
384 MII.reset(ObjectTarget->createMCInstrInfo());
385 if (!MII)
386 return make_error<UnsupportedDisassembly>("Failed to initialise MII.");
387
388 Context.reset(new MCContext(AsmInfo.get(), RegisterInfo.get(), &MOFI));
389
390 Disassembler.reset(
391 ObjectTarget->createMCDisassembler(*SubtargetInfo, *Context));
392
393 if (!Disassembler)
394 return make_error<UnsupportedDisassembly>(
395 "No disassembler available for target");
396
397 MIA.reset(ObjectTarget->createMCInstrAnalysis(MII.get()));
398
399 Printer.reset(ObjectTarget->createMCInstPrinter(
400 ObjectTriple, AsmInfo->getAssemblerDialect(), *AsmInfo, *MII,
401 *RegisterInfo));
402
403 return Error::success();
404 }
405
parseCodeSections()406 Error FileAnalysis::parseCodeSections() {
407 if (!IgnoreDWARFFlag) {
408 std::unique_ptr<DWARFContext> DWARF = DWARFContext::create(*Object);
409 if (!DWARF)
410 return make_error<StringError>("Could not create DWARF information.",
411 inconvertibleErrorCode());
412
413 bool LineInfoValid = false;
414
415 for (auto &Unit : DWARF->compile_units()) {
416 const auto &LineTable = DWARF->getLineTableForUnit(Unit.get());
417 if (LineTable && !LineTable->Rows.empty()) {
418 LineInfoValid = true;
419 break;
420 }
421 }
422
423 if (!LineInfoValid)
424 return make_error<StringError>(
425 "DWARF line information missing. Did you compile with '-g'?",
426 inconvertibleErrorCode());
427 }
428
429 for (const object::SectionRef &Section : Object->sections()) {
430 // Ensure only executable sections get analysed.
431 if (!(object::ELFSectionRef(Section).getFlags() & ELF::SHF_EXECINSTR))
432 continue;
433
434 StringRef SectionContents;
435 if (Section.getContents(SectionContents))
436 return make_error<StringError>("Failed to retrieve section contents",
437 inconvertibleErrorCode());
438
439 ArrayRef<uint8_t> SectionBytes((const uint8_t *)SectionContents.data(),
440 Section.getSize());
441 parseSectionContents(SectionBytes, Section.getAddress());
442 }
443 return Error::success();
444 }
445
parseSectionContents(ArrayRef<uint8_t> SectionBytes,uint64_t SectionAddress)446 void FileAnalysis::parseSectionContents(ArrayRef<uint8_t> SectionBytes,
447 uint64_t SectionAddress) {
448 assert(Symbolizer && "Symbolizer is uninitialised.");
449 MCInst Instruction;
450 Instr InstrMeta;
451 uint64_t InstructionSize;
452
453 for (uint64_t Byte = 0; Byte < SectionBytes.size();) {
454 bool ValidInstruction =
455 Disassembler->getInstruction(Instruction, InstructionSize,
456 SectionBytes.drop_front(Byte), 0, nulls(),
457 outs()) == MCDisassembler::Success;
458
459 Byte += InstructionSize;
460
461 uint64_t VMAddress = SectionAddress + Byte - InstructionSize;
462 InstrMeta.Instruction = Instruction;
463 InstrMeta.VMAddress = VMAddress;
464 InstrMeta.InstructionSize = InstructionSize;
465 InstrMeta.Valid = ValidInstruction;
466
467 addInstruction(InstrMeta);
468
469 if (!ValidInstruction)
470 continue;
471
472 // Skip additional parsing for instructions that do not affect the control
473 // flow.
474 const auto &InstrDesc = MII->get(Instruction.getOpcode());
475 if (!InstrDesc.mayAffectControlFlow(Instruction, *RegisterInfo))
476 continue;
477
478 uint64_t Target;
479 if (MIA->evaluateBranch(Instruction, VMAddress, InstructionSize, Target)) {
480 // If the target can be evaluated, it's not indirect.
481 StaticBranchTargetings[Target].push_back(VMAddress);
482 continue;
483 }
484
485 if (!usesRegisterOperand(InstrMeta))
486 continue;
487
488 if (InstrDesc.isReturn())
489 continue;
490
491 // Check if this instruction exists in the range of the DWARF metadata.
492 if (!IgnoreDWARFFlag) {
493 auto LineInfo =
494 Symbolizer->symbolizeCode(Object->getFileName(), VMAddress);
495 if (!LineInfo) {
496 handleAllErrors(LineInfo.takeError(), [](const ErrorInfoBase &E) {
497 errs() << "Symbolizer failed to get line: " << E.message() << "\n";
498 });
499 continue;
500 }
501
502 if (LineInfo->FileName == "<invalid>")
503 continue;
504 }
505
506 IndirectInstructions.insert(VMAddress);
507 }
508 }
509
addInstruction(const Instr & Instruction)510 void FileAnalysis::addInstruction(const Instr &Instruction) {
511 const auto &KV =
512 Instructions.insert(std::make_pair(Instruction.VMAddress, Instruction));
513 if (!KV.second) {
514 errs() << "Failed to add instruction at address "
515 << format_hex(Instruction.VMAddress, 2)
516 << ": Instruction at this address already exists.\n";
517 exit(EXIT_FAILURE);
518 }
519 }
520
UnsupportedDisassembly(StringRef Text)521 UnsupportedDisassembly::UnsupportedDisassembly(StringRef Text) : Text(Text) {}
522
523 char UnsupportedDisassembly::ID;
log(raw_ostream & OS) const524 void UnsupportedDisassembly::log(raw_ostream &OS) const {
525 OS << "Could not initialise disassembler: " << Text;
526 }
527
convertToErrorCode() const528 std::error_code UnsupportedDisassembly::convertToErrorCode() const {
529 return std::error_code();
530 }
531
532 } // namespace cfi_verify
533 } // namespace llvm
534