1 //===- subzero/src/IceASanInstrumentation.cpp - ASan ------------*- C++ -*-===//
2 //
3 // The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Implements the AddressSanitizer instrumentation class.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "IceASanInstrumentation.h"
16
17 #include "IceBuildDefs.h"
18 #include "IceCfg.h"
19 #include "IceCfgNode.h"
20 #include "IceGlobalInits.h"
21 #include "IceInst.h"
22 #include "IceTargetLowering.h"
23 #include "IceTypes.h"
24
25 #include <sstream>
26 #include <unordered_map>
27 #include <unordered_set>
28 #include <vector>
29
30 namespace Ice {
31
32 namespace {
33
34 constexpr SizeT BytesPerWord = sizeof(uint32_t);
35 constexpr SizeT RzSize = 32;
36 constexpr SizeT ShadowScaleLog2 = 3;
37 constexpr SizeT ShadowScale = 1 << ShadowScaleLog2;
38 constexpr SizeT ShadowLength32 = 1 << (32 - ShadowScaleLog2);
39 constexpr int32_t StackPoisonVal = -1;
40 constexpr const char *ASanPrefix = "__asan";
41 constexpr const char *RzPrefix = "__$rz";
42 constexpr const char *RzArrayName = "__$rz_array";
43 constexpr const char *RzSizesName = "__$rz_sizes";
44 const llvm::NaClBitcodeRecord::RecordVector RzContents =
45 llvm::NaClBitcodeRecord::RecordVector(RzSize, 'R');
46
47 // In order to instrument the code correctly, the .pexe must not have had its
48 // symbols stripped.
49 using StringMap = std::unordered_map<std::string, std::string>;
50 using StringSet = std::unordered_set<std::string>;
51 // TODO(tlively): Handle all allocation functions
52 const StringMap FuncSubstitutions = {{"malloc", "__asan_malloc"},
53 {"free", "__asan_free"},
54 {"calloc", "__asan_calloc"},
55 {"__asan_dummy_calloc", "__asan_calloc"},
56 {"realloc", "__asan_realloc"}};
57 const StringSet FuncBlackList = {"_Balloc"};
58
sizeToByteVec(SizeT Size)59 llvm::NaClBitcodeRecord::RecordVector sizeToByteVec(SizeT Size) {
60 llvm::NaClBitcodeRecord::RecordVector SizeContents;
61 for (unsigned i = 0; i < sizeof(Size); ++i) {
62 SizeContents.emplace_back(Size % (1 << CHAR_BIT));
63 Size >>= CHAR_BIT;
64 }
65 return SizeContents;
66 }
67
68 } // end of anonymous namespace
69
70 ICE_TLS_DEFINE_FIELD(VarSizeMap *, ASanInstrumentation, LocalVars);
71 ICE_TLS_DEFINE_FIELD(std::vector<InstStore *> *, ASanInstrumentation,
72 LocalDtors);
73 ICE_TLS_DEFINE_FIELD(CfgNode *, ASanInstrumentation, CurNode);
74 ICE_TLS_DEFINE_FIELD(VarSizeMap *, ASanInstrumentation, CheckedVars);
75
isInstrumentable(Cfg * Func)76 bool ASanInstrumentation::isInstrumentable(Cfg *Func) {
77 std::string FuncName = Func->getFunctionName().toStringOrEmpty();
78 return FuncName == "" ||
79 (FuncBlackList.count(FuncName) == 0 && FuncName.find(ASanPrefix) != 0);
80 }
81
82 // Create redzones around all global variables, ensuring that the initializer
83 // types of the redzones and their associated globals match so that they are
84 // laid out together in memory.
instrumentGlobals(VariableDeclarationList & Globals)85 void ASanInstrumentation::instrumentGlobals(VariableDeclarationList &Globals) {
86 std::unique_lock<std::mutex> _(GlobalsMutex);
87 if (DidProcessGlobals)
88 return;
89 VariableDeclarationList NewGlobals;
90 // Global holding pointers to all redzones
91 auto *RzArray = VariableDeclaration::create(&NewGlobals);
92 // Global holding sizes of all redzones
93 auto *RzSizes = VariableDeclaration::create(&NewGlobals);
94
95 RzArray->setName(Ctx, RzArrayName);
96 RzSizes->setName(Ctx, RzSizesName);
97 RzArray->setIsConstant(true);
98 RzSizes->setIsConstant(true);
99 NewGlobals.push_back(RzArray);
100 NewGlobals.push_back(RzSizes);
101
102 using PrototypeMap = std::unordered_map<std::string, FunctionDeclaration *>;
103 PrototypeMap ProtoSubstitutions;
104 for (VariableDeclaration *Global : Globals) {
105 assert(Global->getAlignment() <= RzSize);
106 VariableDeclaration *RzLeft = VariableDeclaration::create(&NewGlobals);
107 VariableDeclaration *NewGlobal = Global;
108 VariableDeclaration *RzRight = VariableDeclaration::create(&NewGlobals);
109 RzLeft->setName(Ctx, nextRzName());
110 RzRight->setName(Ctx, nextRzName());
111 SizeT Alignment = std::max(RzSize, Global->getAlignment());
112 SizeT RzLeftSize = Alignment;
113 SizeT RzRightSize =
114 RzSize + Utils::OffsetToAlignment(Global->getNumBytes(), Alignment);
115 if (!Global->hasNonzeroInitializer()) {
116 RzLeft->addInitializer(VariableDeclaration::ZeroInitializer::create(
117 &NewGlobals, RzLeftSize));
118 RzRight->addInitializer(VariableDeclaration::ZeroInitializer::create(
119 &NewGlobals, RzRightSize));
120 } else {
121 RzLeft->addInitializer(VariableDeclaration::DataInitializer::create(
122 &NewGlobals, llvm::NaClBitcodeRecord::RecordVector(RzLeftSize, 'R')));
123 RzRight->addInitializer(VariableDeclaration::DataInitializer::create(
124 &NewGlobals,
125 llvm::NaClBitcodeRecord::RecordVector(RzRightSize, 'R')));
126
127 // replace any pointers to allocator functions
128 NewGlobal = VariableDeclaration::create(&NewGlobals);
129 NewGlobal->setName(Global->getName());
130 std::vector<VariableDeclaration::Initializer *> GlobalInits =
131 Global->getInitializers();
132 for (VariableDeclaration::Initializer *Init : GlobalInits) {
133 auto *RelocInit =
134 llvm::dyn_cast<VariableDeclaration::RelocInitializer>(Init);
135 if (RelocInit == nullptr) {
136 NewGlobal->addInitializer(Init);
137 continue;
138 }
139 const GlobalDeclaration *TargetDecl = RelocInit->getDeclaration();
140 const auto *TargetFunc =
141 llvm::dyn_cast<FunctionDeclaration>(TargetDecl);
142 if (TargetFunc == nullptr) {
143 NewGlobal->addInitializer(Init);
144 continue;
145 }
146 std::string TargetName = TargetDecl->getName().toStringOrEmpty();
147 StringMap::const_iterator Subst = FuncSubstitutions.find(TargetName);
148 if (Subst == FuncSubstitutions.end()) {
149 NewGlobal->addInitializer(Init);
150 continue;
151 }
152 std::string SubstName = Subst->second;
153 PrototypeMap::iterator SubstProtoEntry =
154 ProtoSubstitutions.find(SubstName);
155 FunctionDeclaration *SubstProto;
156 if (SubstProtoEntry != ProtoSubstitutions.end())
157 SubstProto = SubstProtoEntry->second;
158 else {
159 constexpr bool IsProto = true;
160 SubstProto = FunctionDeclaration::create(
161 Ctx, TargetFunc->getSignature(), TargetFunc->getCallingConv(),
162 llvm::GlobalValue::ExternalLinkage, IsProto);
163 SubstProto->setName(Ctx, SubstName);
164 ProtoSubstitutions.insert({SubstName, SubstProto});
165 }
166
167 NewGlobal->addInitializer(VariableDeclaration::RelocInitializer::create(
168 &NewGlobals, SubstProto, RelocOffsetArray(0)));
169 }
170 }
171
172 RzLeft->setIsConstant(Global->getIsConstant());
173 NewGlobal->setIsConstant(Global->getIsConstant());
174 RzRight->setIsConstant(Global->getIsConstant());
175 RzLeft->setAlignment(Alignment);
176 NewGlobal->setAlignment(Alignment);
177 RzRight->setAlignment(1);
178 RzArray->addInitializer(VariableDeclaration::RelocInitializer::create(
179 &NewGlobals, RzLeft, RelocOffsetArray(0)));
180 RzArray->addInitializer(VariableDeclaration::RelocInitializer::create(
181 &NewGlobals, RzRight, RelocOffsetArray(0)));
182 RzSizes->addInitializer(VariableDeclaration::DataInitializer::create(
183 &NewGlobals, sizeToByteVec(RzLeftSize)));
184 RzSizes->addInitializer(VariableDeclaration::DataInitializer::create(
185 &NewGlobals, sizeToByteVec(RzRightSize)));
186
187 NewGlobals.push_back(RzLeft);
188 NewGlobals.push_back(NewGlobal);
189 NewGlobals.push_back(RzRight);
190 RzGlobalsNum += 2;
191
192 GlobalSizes.insert({NewGlobal->getName(), NewGlobal->getNumBytes()});
193 }
194
195 // Replace old list of globals, without messing up arena allocators
196 Globals.clear();
197 Globals.merge(&NewGlobals);
198 DidProcessGlobals = true;
199
200 // Log the new set of globals
201 if (BuildDefs::dump() && (getFlags().getVerbose() & IceV_GlobalInit)) {
202 OstreamLocker _(Ctx);
203 Ctx->getStrDump() << "========= Instrumented Globals =========\n";
204 for (VariableDeclaration *Global : Globals) {
205 Global->dump(Ctx->getStrDump());
206 }
207 }
208 }
209
nextRzName()210 std::string ASanInstrumentation::nextRzName() {
211 std::stringstream Name;
212 Name << RzPrefix << RzNum++;
213 return Name.str();
214 }
215
216 // Check for an alloca signaling the presence of local variables and add a
217 // redzone if it is found
instrumentFuncStart(LoweringContext & Context)218 void ASanInstrumentation::instrumentFuncStart(LoweringContext &Context) {
219 if (ICE_TLS_GET_FIELD(LocalDtors) == nullptr) {
220 ICE_TLS_SET_FIELD(LocalDtors, new std::vector<InstStore *>());
221 ICE_TLS_SET_FIELD(LocalVars, new VarSizeMap());
222 }
223 Cfg *Func = Context.getNode()->getCfg();
224 using Entry = std::pair<SizeT, int32_t>;
225 std::vector<InstAlloca *> NewAllocas;
226 std::vector<Entry> PoisonVals;
227 Variable *FirstShadowLocVar;
228 InstArithmetic *ShadowIndexCalc;
229 InstArithmetic *ShadowLocCalc;
230 InstAlloca *Cur;
231 ConstantInteger32 *VarSizeOp;
232 while (!Context.atEnd()) {
233 Cur = llvm::dyn_cast<InstAlloca>(iteratorToInst(Context.getCur()));
234 VarSizeOp = (Cur == nullptr)
235 ? nullptr
236 : llvm::dyn_cast<ConstantInteger32>(Cur->getSizeInBytes());
237 if (Cur == nullptr || VarSizeOp == nullptr) {
238 Context.advanceCur();
239 Context.advanceNext();
240 continue;
241 }
242
243 Cur->setDeleted();
244
245 if (PoisonVals.empty()) {
246 // insert leftmost redzone
247 auto *LastRzVar = Func->makeVariable(IceType_i32);
248 LastRzVar->setName(Func, nextRzName());
249 auto *ByteCount = ConstantInteger32::create(Ctx, IceType_i32, RzSize);
250 constexpr SizeT Alignment = 8;
251 NewAllocas.emplace_back(
252 InstAlloca::create(Func, LastRzVar, ByteCount, Alignment));
253 PoisonVals.emplace_back(Entry{RzSize >> ShadowScaleLog2, StackPoisonVal});
254
255 // Calculate starting address for poisoning
256 FirstShadowLocVar = Func->makeVariable(IceType_i32);
257 FirstShadowLocVar->setName(Func, "firstShadowLoc");
258 auto *ShadowIndexVar = Func->makeVariable(IceType_i32);
259 ShadowIndexVar->setName(Func, "shadowIndex");
260
261 auto *ShadowScaleLog2Const =
262 ConstantInteger32::create(Ctx, IceType_i32, ShadowScaleLog2);
263 auto *ShadowMemLocConst =
264 ConstantInteger32::create(Ctx, IceType_i32, ShadowLength32);
265
266 ShadowIndexCalc =
267 InstArithmetic::create(Func, InstArithmetic::Lshr, ShadowIndexVar,
268 LastRzVar, ShadowScaleLog2Const);
269 ShadowLocCalc =
270 InstArithmetic::create(Func, InstArithmetic::Add, FirstShadowLocVar,
271 ShadowIndexVar, ShadowMemLocConst);
272 }
273
274 // create the new alloca that includes a redzone
275 SizeT VarSize = VarSizeOp->getValue();
276 Variable *Dest = Cur->getDest();
277 ICE_TLS_GET_FIELD(LocalVars)->insert({Dest, VarSize});
278 SizeT RzPadding = RzSize + Utils::OffsetToAlignment(VarSize, RzSize);
279 auto *ByteCount =
280 ConstantInteger32::create(Ctx, IceType_i32, VarSize + RzPadding);
281 constexpr SizeT Alignment = 8;
282 NewAllocas.emplace_back(
283 InstAlloca::create(Func, Dest, ByteCount, Alignment));
284
285 const SizeT Zeros = VarSize >> ShadowScaleLog2;
286 const SizeT Offset = VarSize % ShadowScale;
287 const SizeT PoisonBytes =
288 ((VarSize + RzPadding) >> ShadowScaleLog2) - Zeros - 1;
289 if (Zeros > 0)
290 PoisonVals.emplace_back(Entry{Zeros, 0});
291 PoisonVals.emplace_back(Entry{1, (Offset == 0) ? StackPoisonVal : Offset});
292 PoisonVals.emplace_back(Entry{PoisonBytes, StackPoisonVal});
293 Context.advanceCur();
294 Context.advanceNext();
295 }
296
297 Context.rewind();
298 if (PoisonVals.empty()) {
299 Context.advanceNext();
300 return;
301 }
302 for (InstAlloca *RzAlloca : NewAllocas) {
303 Context.insert(RzAlloca);
304 }
305 Context.insert(ShadowIndexCalc);
306 Context.insert(ShadowLocCalc);
307
308 // Poison redzones
309 std::vector<Entry>::iterator Iter = PoisonVals.begin();
310 for (SizeT Offset = 0; Iter != PoisonVals.end(); Offset += BytesPerWord) {
311 int32_t CurVals[BytesPerWord] = {0};
312 for (uint32_t i = 0; i < BytesPerWord; ++i) {
313 if (Iter == PoisonVals.end())
314 break;
315 Entry Val = *Iter;
316 CurVals[i] = Val.second;
317 --Val.first;
318 if (Val.first > 0)
319 *Iter = Val;
320 else
321 ++Iter;
322 }
323 int32_t Poison = ((CurVals[3] & 0xff) << 24) | ((CurVals[2] & 0xff) << 16) |
324 ((CurVals[1] & 0xff) << 8) | (CurVals[0] & 0xff);
325 if (Poison == 0)
326 continue;
327 auto *PoisonConst = ConstantInteger32::create(Ctx, IceType_i32, Poison);
328 auto *ZeroConst = ConstantInteger32::create(Ctx, IceType_i32, 0);
329 auto *OffsetConst = ConstantInteger32::create(Ctx, IceType_i32, Offset);
330 auto *PoisonAddrVar = Func->makeVariable(IceType_i32);
331 Context.insert(InstArithmetic::create(Func, InstArithmetic::Add,
332 PoisonAddrVar, FirstShadowLocVar,
333 OffsetConst));
334 Context.insert(InstStore::create(Func, PoisonConst, PoisonAddrVar));
335 ICE_TLS_GET_FIELD(LocalDtors)
336 ->emplace_back(InstStore::create(Func, ZeroConst, PoisonAddrVar));
337 }
338 Context.advanceNext();
339 }
340
instrumentCall(LoweringContext & Context,InstCall * Instr)341 void ASanInstrumentation::instrumentCall(LoweringContext &Context,
342 InstCall *Instr) {
343 auto *CallTarget =
344 llvm::dyn_cast<ConstantRelocatable>(Instr->getCallTarget());
345 if (CallTarget == nullptr)
346 return;
347
348 std::string TargetName = CallTarget->getName().toStringOrEmpty();
349 auto Subst = FuncSubstitutions.find(TargetName);
350 if (Subst == FuncSubstitutions.end())
351 return;
352
353 std::string SubName = Subst->second;
354 Constant *NewFunc = Ctx->getConstantExternSym(Ctx->getGlobalString(SubName));
355 auto *NewCall =
356 InstCall::create(Context.getNode()->getCfg(), Instr->getNumArgs(),
357 Instr->getDest(), NewFunc, Instr->isTailcall());
358 for (SizeT I = 0, Args = Instr->getNumArgs(); I < Args; ++I)
359 NewCall->addArg(Instr->getArg(I));
360 Context.insert(NewCall);
361 Instr->setDeleted();
362 }
363
instrumentLoad(LoweringContext & Context,InstLoad * Instr)364 void ASanInstrumentation::instrumentLoad(LoweringContext &Context,
365 InstLoad *Instr) {
366 Operand *Src = Instr->getSourceAddress();
367 if (auto *Reloc = llvm::dyn_cast<ConstantRelocatable>(Src)) {
368 auto *NewLoad = InstLoad::create(Context.getNode()->getCfg(),
369 Instr->getDest(), instrumentReloc(Reloc));
370 Instr->setDeleted();
371 Context.insert(NewLoad);
372 Instr = NewLoad;
373 }
374 Constant *Func =
375 Ctx->getConstantExternSym(Ctx->getGlobalString("__asan_check_load"));
376 instrumentAccess(Context, Instr->getSourceAddress(),
377 typeWidthInBytes(Instr->getDest()->getType()), Func);
378 }
379
instrumentStore(LoweringContext & Context,InstStore * Instr)380 void ASanInstrumentation::instrumentStore(LoweringContext &Context,
381 InstStore *Instr) {
382 Operand *Data = Instr->getData();
383 if (auto *Reloc = llvm::dyn_cast<ConstantRelocatable>(Data)) {
384 auto *NewStore = InstStore::create(
385 Context.getNode()->getCfg(), instrumentReloc(Reloc), Instr->getAddr());
386 Instr->setDeleted();
387 Context.insert(NewStore);
388 Instr = NewStore;
389 }
390 Constant *Func =
391 Ctx->getConstantExternSym(Ctx->getGlobalString("__asan_check_store"));
392 instrumentAccess(Context, Instr->getAddr(),
393 typeWidthInBytes(Instr->getData()->getType()), Func);
394 }
395
396 ConstantRelocatable *
instrumentReloc(ConstantRelocatable * Reloc)397 ASanInstrumentation::instrumentReloc(ConstantRelocatable *Reloc) {
398 std::string DataName = Reloc->getName().toString();
399 StringMap::const_iterator DataSub = FuncSubstitutions.find(DataName);
400 if (DataSub != FuncSubstitutions.end()) {
401 return ConstantRelocatable::create(
402 Ctx, Reloc->getType(),
403 RelocatableTuple(Reloc->getOffset(), RelocOffsetArray(0),
404 Ctx->getGlobalString(DataSub->second),
405 Reloc->getEmitString()));
406 }
407 return Reloc;
408 }
409
instrumentAccess(LoweringContext & Context,Operand * Op,SizeT Size,Constant * CheckFunc)410 void ASanInstrumentation::instrumentAccess(LoweringContext &Context,
411 Operand *Op, SizeT Size,
412 Constant *CheckFunc) {
413 // Skip redundant checks within basic blocks
414 VarSizeMap *Checked = ICE_TLS_GET_FIELD(CheckedVars);
415 if (ICE_TLS_GET_FIELD(CurNode) != Context.getNode()) {
416 ICE_TLS_SET_FIELD(CurNode, Context.getNode());
417 if (Checked == NULL) {
418 Checked = new VarSizeMap();
419 ICE_TLS_SET_FIELD(CheckedVars, Checked);
420 }
421 Checked->clear();
422 }
423 VarSizeMap::iterator PrevCheck = Checked->find(Op);
424 if (PrevCheck != Checked->end() && PrevCheck->second >= Size)
425 return;
426 else
427 Checked->insert({Op, Size});
428
429 // check for known good local access
430 VarSizeMap::iterator LocalSize = ICE_TLS_GET_FIELD(LocalVars)->find(Op);
431 if (LocalSize != ICE_TLS_GET_FIELD(LocalVars)->end() &&
432 LocalSize->second >= Size)
433 return;
434 if (isOkGlobalAccess(Op, Size))
435 return;
436 constexpr SizeT NumArgs = 2;
437 constexpr Variable *Void = nullptr;
438 constexpr bool NoTailCall = false;
439 auto *Call = InstCall::create(Context.getNode()->getCfg(), NumArgs, Void,
440 CheckFunc, NoTailCall);
441 Call->addArg(Op);
442 Call->addArg(ConstantInteger32::create(Ctx, IceType_i32, Size));
443 // play games to insert the call before the access instruction
444 InstList::iterator Next = Context.getNext();
445 Context.setInsertPoint(Context.getCur());
446 Context.insert(Call);
447 Context.setNext(Next);
448 }
449
450 // TODO(tlively): Trace back load and store addresses to find their real offsets
isOkGlobalAccess(Operand * Op,SizeT Size)451 bool ASanInstrumentation::isOkGlobalAccess(Operand *Op, SizeT Size) {
452 auto *Reloc = llvm::dyn_cast<ConstantRelocatable>(Op);
453 if (Reloc == nullptr)
454 return false;
455 RelocOffsetT Offset = Reloc->getOffset();
456 GlobalSizeMap::iterator GlobalSize = GlobalSizes.find(Reloc->getName());
457 return GlobalSize != GlobalSizes.end() && GlobalSize->second - Offset >= Size;
458 }
459
instrumentRet(LoweringContext & Context,InstRet *)460 void ASanInstrumentation::instrumentRet(LoweringContext &Context, InstRet *) {
461 Cfg *Func = Context.getNode()->getCfg();
462 Context.setInsertPoint(Context.getCur());
463 for (InstStore *RzUnpoison : *ICE_TLS_GET_FIELD(LocalDtors)) {
464 Context.insert(
465 InstStore::create(Func, RzUnpoison->getData(), RzUnpoison->getAddr()));
466 }
467 Context.advanceCur();
468 Context.advanceNext();
469 }
470
instrumentStart(Cfg * Func)471 void ASanInstrumentation::instrumentStart(Cfg *Func) {
472 Constant *ShadowMemInit =
473 Ctx->getConstantExternSym(Ctx->getGlobalString("__asan_init"));
474 constexpr SizeT NumArgs = 3;
475 constexpr Variable *Void = nullptr;
476 constexpr bool NoTailCall = false;
477 auto *Call = InstCall::create(Func, NumArgs, Void, ShadowMemInit, NoTailCall);
478 Func->getEntryNode()->getInsts().push_front(Call);
479
480 instrumentGlobals(*getGlobals());
481
482 Call->addArg(ConstantInteger32::create(Ctx, IceType_i32, RzGlobalsNum));
483 Call->addArg(Ctx->getConstantSym(0, Ctx->getGlobalString(RzArrayName)));
484 Call->addArg(Ctx->getConstantSym(0, Ctx->getGlobalString(RzSizesName)));
485 }
486
487 // TODO(tlively): make this more efficient with swap idiom
finishFunc(Cfg *)488 void ASanInstrumentation::finishFunc(Cfg *) {
489 ICE_TLS_GET_FIELD(LocalVars)->clear();
490 ICE_TLS_GET_FIELD(LocalDtors)->clear();
491 }
492
493 } // end of namespace Ice
494