1 //===------- ShadowCallStack.cpp - Shadow Call Stack pass -----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // The ShadowCallStack pass instruments function prologs/epilogs to check that
11 // the return address has not been corrupted during the execution of the
12 // function. The return address is stored in a 'shadow call stack' addressed
13 // using the %gs segment register.
14 //
15 //===----------------------------------------------------------------------===//
16
17 #include "X86.h"
18 #include "X86InstrBuilder.h"
19 #include "X86InstrInfo.h"
20 #include "X86Subtarget.h"
21
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineFunctionPass.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineModuleInfo.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/Passes.h"
28 #include "llvm/CodeGen/TargetInstrInfo.h"
29 #include "llvm/Pass.h"
30 #include "llvm/Support/raw_ostream.h"
31
32 using namespace llvm;
33
34 namespace llvm {
35 void initializeShadowCallStackPass(PassRegistry &);
36 }
37
38 namespace {
39
40 class ShadowCallStack : public MachineFunctionPass {
41 public:
42 static char ID;
43
ShadowCallStack()44 ShadowCallStack() : MachineFunctionPass(ID) {
45 initializeShadowCallStackPass(*PassRegistry::getPassRegistry());
46 }
47
getAnalysisUsage(AnalysisUsage & AU) const48 void getAnalysisUsage(AnalysisUsage &AU) const override {
49 MachineFunctionPass::getAnalysisUsage(AU);
50 }
51
52 bool runOnMachineFunction(MachineFunction &Fn) override;
53
54 private:
55 // Do not instrument leaf functions with this many or fewer instructions. The
56 // shadow call stack instrumented prolog/epilog are slightly race-y reading
57 // and checking the saved return address, so it is better to not instrument
58 // functions that have fewer instructions than the instrumented prolog/epilog
59 // race.
60 static const size_t SkipLeafInstructions = 3;
61 };
62
63 char ShadowCallStack::ID = 0;
64 } // end anonymous namespace.
65
66 static void addProlog(MachineFunction &Fn, const TargetInstrInfo *TII,
67 MachineBasicBlock &MBB, const DebugLoc &DL);
68 static void addPrologLeaf(MachineFunction &Fn, const TargetInstrInfo *TII,
69 MachineBasicBlock &MBB, const DebugLoc &DL,
70 MCPhysReg FreeRegister);
71
72 static void addEpilog(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
73 MachineInstr &MI, MachineBasicBlock &TrapBB);
74 static void addEpilogLeaf(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
75 MachineInstr &MI, MachineBasicBlock &TrapBB,
76 MCPhysReg FreeRegister);
77 // Generate a longer epilog that only uses r10 when a tailcall branches to r11.
78 static void addEpilogOnlyR10(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
79 MachineInstr &MI, MachineBasicBlock &TrapBB);
80
81 // Helper function to add ModR/M references for [Seg: Reg + Offset] memory
82 // accesses
83 static inline const MachineInstrBuilder &
addSegmentedMem(const MachineInstrBuilder & MIB,MCPhysReg Seg,MCPhysReg Reg,int Offset=0)84 addSegmentedMem(const MachineInstrBuilder &MIB, MCPhysReg Seg, MCPhysReg Reg,
85 int Offset = 0) {
86 return MIB.addReg(Reg).addImm(1).addReg(0).addImm(Offset).addReg(Seg);
87 }
88
addProlog(MachineFunction & Fn,const TargetInstrInfo * TII,MachineBasicBlock & MBB,const DebugLoc & DL)89 static void addProlog(MachineFunction &Fn, const TargetInstrInfo *TII,
90 MachineBasicBlock &MBB, const DebugLoc &DL) {
91 const MCPhysReg ReturnReg = X86::R10;
92 const MCPhysReg OffsetReg = X86::R11;
93
94 auto MBBI = MBB.begin();
95 // mov r10, [rsp]
96 addDirectMem(BuildMI(MBB, MBBI, DL, TII->get(X86::MOV64rm)).addDef(ReturnReg),
97 X86::RSP);
98 // xor r11, r11
99 BuildMI(MBB, MBBI, DL, TII->get(X86::XOR64rr))
100 .addDef(OffsetReg)
101 .addReg(OffsetReg, RegState::Undef)
102 .addReg(OffsetReg, RegState::Undef);
103 // add QWORD [gs:r11], 8
104 addSegmentedMem(BuildMI(MBB, MBBI, DL, TII->get(X86::ADD64mi8)), X86::GS,
105 OffsetReg)
106 .addImm(8);
107 // mov r11, [gs:r11]
108 addSegmentedMem(
109 BuildMI(MBB, MBBI, DL, TII->get(X86::MOV64rm)).addDef(OffsetReg), X86::GS,
110 OffsetReg);
111 // mov [gs:r11], r10
112 addSegmentedMem(BuildMI(MBB, MBBI, DL, TII->get(X86::MOV64mr)), X86::GS,
113 OffsetReg)
114 .addReg(ReturnReg);
115 }
116
addPrologLeaf(MachineFunction & Fn,const TargetInstrInfo * TII,MachineBasicBlock & MBB,const DebugLoc & DL,MCPhysReg FreeRegister)117 static void addPrologLeaf(MachineFunction &Fn, const TargetInstrInfo *TII,
118 MachineBasicBlock &MBB, const DebugLoc &DL,
119 MCPhysReg FreeRegister) {
120 // mov REG, [rsp]
121 addDirectMem(BuildMI(MBB, MBB.begin(), DL, TII->get(X86::MOV64rm))
122 .addDef(FreeRegister),
123 X86::RSP);
124 }
125
addEpilog(const TargetInstrInfo * TII,MachineBasicBlock & MBB,MachineInstr & MI,MachineBasicBlock & TrapBB)126 static void addEpilog(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
127 MachineInstr &MI, MachineBasicBlock &TrapBB) {
128 const DebugLoc &DL = MI.getDebugLoc();
129
130 // xor r11, r11
131 BuildMI(MBB, MI, DL, TII->get(X86::XOR64rr))
132 .addDef(X86::R11)
133 .addReg(X86::R11, RegState::Undef)
134 .addReg(X86::R11, RegState::Undef);
135 // mov r10, [gs:r11]
136 addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10),
137 X86::GS, X86::R11);
138 // mov r10, [gs:r10]
139 addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10),
140 X86::GS, X86::R10);
141 // sub QWORD [gs:r11], 8
142 // This instruction should not be moved up to avoid a signal race.
143 addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::SUB64mi8)),
144 X86::GS, X86::R11)
145 .addImm(8);
146 // cmp [rsp], r10
147 addDirectMem(BuildMI(MBB, MI, DL, TII->get(X86::CMP64mr)), X86::RSP)
148 .addReg(X86::R10);
149 // jne trap
150 BuildMI(MBB, MI, DL, TII->get(X86::JNE_1)).addMBB(&TrapBB);
151 MBB.addSuccessor(&TrapBB);
152 }
153
addEpilogLeaf(const TargetInstrInfo * TII,MachineBasicBlock & MBB,MachineInstr & MI,MachineBasicBlock & TrapBB,MCPhysReg FreeRegister)154 static void addEpilogLeaf(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
155 MachineInstr &MI, MachineBasicBlock &TrapBB,
156 MCPhysReg FreeRegister) {
157 const DebugLoc &DL = MI.getDebugLoc();
158
159 // cmp [rsp], REG
160 addDirectMem(BuildMI(MBB, MI, DL, TII->get(X86::CMP64mr)), X86::RSP)
161 .addReg(FreeRegister);
162 // jne trap
163 BuildMI(MBB, MI, DL, TII->get(X86::JNE_1)).addMBB(&TrapBB);
164 MBB.addSuccessor(&TrapBB);
165 }
166
addEpilogOnlyR10(const TargetInstrInfo * TII,MachineBasicBlock & MBB,MachineInstr & MI,MachineBasicBlock & TrapBB)167 static void addEpilogOnlyR10(const TargetInstrInfo *TII, MachineBasicBlock &MBB,
168 MachineInstr &MI, MachineBasicBlock &TrapBB) {
169 const DebugLoc &DL = MI.getDebugLoc();
170
171 // xor r10, r10
172 BuildMI(MBB, MI, DL, TII->get(X86::XOR64rr))
173 .addDef(X86::R10)
174 .addReg(X86::R10, RegState::Undef)
175 .addReg(X86::R10, RegState::Undef);
176 // mov r10, [gs:r10]
177 addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10),
178 X86::GS, X86::R10);
179 // mov r10, [gs:r10]
180 addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10),
181 X86::GS, X86::R10);
182 // sub QWORD [gs:0], 8
183 // This instruction should not be moved up to avoid a signal race.
184 addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::SUB64mi8)), X86::GS, 0)
185 .addImm(8);
186 // cmp [rsp], r10
187 addDirectMem(BuildMI(MBB, MI, DL, TII->get(X86::CMP64mr)), X86::RSP)
188 .addReg(X86::R10);
189 // jne trap
190 BuildMI(MBB, MI, DL, TII->get(X86::JNE_1)).addMBB(&TrapBB);
191 MBB.addSuccessor(&TrapBB);
192 }
193
runOnMachineFunction(MachineFunction & Fn)194 bool ShadowCallStack::runOnMachineFunction(MachineFunction &Fn) {
195 if (!Fn.getFunction().hasFnAttribute(Attribute::ShadowCallStack) ||
196 Fn.getFunction().hasFnAttribute(Attribute::Naked))
197 return false;
198
199 if (Fn.empty() || !Fn.getRegInfo().tracksLiveness())
200 return false;
201
202 // FIXME: Skip functions that have r10 or r11 live on entry (r10 can be live
203 // on entry for parameters with the nest attribute.)
204 if (Fn.front().isLiveIn(X86::R10) || Fn.front().isLiveIn(X86::R11))
205 return false;
206
207 // FIXME: Skip functions with conditional and r10 tail calls for now.
208 bool HasReturn = false;
209 for (auto &MBB : Fn) {
210 if (MBB.empty())
211 continue;
212
213 const MachineInstr &MI = MBB.instr_back();
214 if (MI.isReturn())
215 HasReturn = true;
216
217 if (MI.isReturn() && MI.isCall()) {
218 if (MI.findRegisterUseOperand(X86::EFLAGS))
219 return false;
220 // This should only be possible on Windows 64 (see GR64_TC versus
221 // GR64_TCW64.)
222 if (MI.findRegisterUseOperand(X86::R10) ||
223 MI.hasRegisterImplicitUseOperand(X86::R10))
224 return false;
225 }
226 }
227
228 if (!HasReturn)
229 return false;
230
231 // For leaf functions:
232 // 1. Do not instrument very short functions where it would not improve that
233 // function's security.
234 // 2. Detect if there is an unused caller-saved register we can reserve to
235 // hold the return address instead of writing/reading it from the shadow
236 // call stack.
237 MCPhysReg LeafFuncRegister = X86::NoRegister;
238 if (!Fn.getFrameInfo().adjustsStack()) {
239 size_t InstructionCount = 0;
240 std::bitset<X86::NUM_TARGET_REGS> UsedRegs;
241 for (auto &MBB : Fn) {
242 for (auto &LiveIn : MBB.liveins())
243 UsedRegs.set(LiveIn.PhysReg);
244 for (auto &MI : MBB) {
245 if (!MI.isDebugValue() && !MI.isCFIInstruction() && !MI.isLabel())
246 InstructionCount++;
247 for (auto &Op : MI.operands())
248 if (Op.isReg() && Op.isDef())
249 UsedRegs.set(Op.getReg());
250 }
251 }
252
253 if (InstructionCount <= SkipLeafInstructions)
254 return false;
255
256 std::bitset<X86::NUM_TARGET_REGS> CalleeSavedRegs;
257 const MCPhysReg *CSRegs = Fn.getRegInfo().getCalleeSavedRegs();
258 for (size_t i = 0; CSRegs[i]; i++)
259 CalleeSavedRegs.set(CSRegs[i]);
260
261 const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
262 for (auto &Reg : X86::GR64_NOSPRegClass.getRegisters()) {
263 // FIXME: Optimization opportunity: spill/restore a callee-saved register
264 // if a caller-saved register is unavailable.
265 if (CalleeSavedRegs.test(Reg))
266 continue;
267
268 bool Used = false;
269 for (MCSubRegIterator SR(Reg, TRI, true); SR.isValid(); ++SR)
270 if ((Used = UsedRegs.test(*SR)))
271 break;
272
273 if (!Used) {
274 LeafFuncRegister = Reg;
275 break;
276 }
277 }
278 }
279
280 const bool LeafFuncOptimization = LeafFuncRegister != X86::NoRegister;
281 if (LeafFuncOptimization)
282 // Mark the leaf function register live-in for all MBBs except the entry MBB
283 for (auto I = ++Fn.begin(), E = Fn.end(); I != E; ++I)
284 I->addLiveIn(LeafFuncRegister);
285
286 MachineBasicBlock &MBB = Fn.front();
287 const MachineBasicBlock *NonEmpty = MBB.empty() ? MBB.getFallThrough() : &MBB;
288 const DebugLoc &DL = NonEmpty->front().getDebugLoc();
289
290 const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
291 if (LeafFuncOptimization)
292 addPrologLeaf(Fn, TII, MBB, DL, LeafFuncRegister);
293 else
294 addProlog(Fn, TII, MBB, DL);
295
296 MachineBasicBlock *Trap = nullptr;
297 for (auto &MBB : Fn) {
298 if (MBB.empty())
299 continue;
300
301 MachineInstr &MI = MBB.instr_back();
302 if (MI.isReturn()) {
303 if (!Trap) {
304 Trap = Fn.CreateMachineBasicBlock();
305 BuildMI(Trap, MI.getDebugLoc(), TII->get(X86::TRAP));
306 Fn.push_back(Trap);
307 }
308
309 if (LeafFuncOptimization)
310 addEpilogLeaf(TII, MBB, MI, *Trap, LeafFuncRegister);
311 else if (MI.findRegisterUseOperand(X86::R11))
312 addEpilogOnlyR10(TII, MBB, MI, *Trap);
313 else
314 addEpilog(TII, MBB, MI, *Trap);
315 }
316 }
317
318 return true;
319 }
320
321 INITIALIZE_PASS(ShadowCallStack, "shadow-call-stack", "Shadow Call Stack",
322 false, false)
323
createShadowCallStackPass()324 FunctionPass *llvm::createShadowCallStackPass() {
325 return new ShadowCallStack();
326 }
327