1 //===- subzero/src/IceASanInstrumentation.cpp - ASan ------------*- C++ -*-===//
2 //
3 // The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Implements the AddressSanitizer instrumentation class.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "IceASanInstrumentation.h"
16
17 #include "IceBuildDefs.h"
18 #include "IceCfg.h"
19 #include "IceCfgNode.h"
20 #include "IceGlobalInits.h"
21 #include "IceInst.h"
22 #include "IceTargetLowering.h"
23 #include "IceTypes.h"
24
25 #include <sstream>
26 #include <unordered_map>
27 #include <unordered_set>
28 #include <vector>
29
30 namespace Ice {
31
32 namespace {
33
34 constexpr SizeT BytesPerWord = sizeof(uint32_t);
35 constexpr SizeT RzSize = 32;
36 constexpr SizeT ShadowScaleLog2 = 3;
37 constexpr SizeT ShadowScale = 1 << ShadowScaleLog2;
38 constexpr SizeT ShadowLength32 = 1 << (32 - ShadowScaleLog2);
39 constexpr int32_t StackPoisonVal = -1;
40 constexpr const char *ASanPrefix = "__asan";
41 constexpr const char *RzPrefix = "__$rz";
42 constexpr const char *RzArrayName = "__$rz_array";
43 constexpr const char *RzSizesName = "__$rz_sizes";
44 const llvm::NaClBitcodeRecord::RecordVector RzContents =
45 llvm::NaClBitcodeRecord::RecordVector(RzSize, 'R');
46
47 // In order to instrument the code correctly, the .pexe must not have had its
48 // symbols stripped.
49 using StringMap = std::unordered_map<std::string, std::string>;
50 using StringSet = std::unordered_set<std::string>;
51 // TODO(tlively): Handle all allocation functions
52 const StringMap FuncSubstitutions = {{"malloc", "__asan_malloc"},
53 {"free", "__asan_free"},
54 {"calloc", "__asan_calloc"},
55 {"__asan_dummy_calloc", "__asan_calloc"},
56 {"realloc", "__asan_realloc"}};
57 const StringSet FuncIgnoreList = {"_Balloc"};
58
sizeToByteVec(SizeT Size)59 llvm::NaClBitcodeRecord::RecordVector sizeToByteVec(SizeT Size) {
60 llvm::NaClBitcodeRecord::RecordVector SizeContents;
61 for (unsigned i = 0; i < sizeof(Size); ++i) {
62 SizeContents.emplace_back(Size % (1 << CHAR_BIT));
63 Size >>= CHAR_BIT;
64 }
65 return SizeContents;
66 }
67
68 } // end of anonymous namespace
69
70 ICE_TLS_DEFINE_FIELD(VarSizeMap *, ASanInstrumentation, LocalVars);
71 ICE_TLS_DEFINE_FIELD(std::vector<InstStore *> *, ASanInstrumentation,
72 LocalDtors);
73 ICE_TLS_DEFINE_FIELD(CfgNode *, ASanInstrumentation, CurNode);
74 ICE_TLS_DEFINE_FIELD(VarSizeMap *, ASanInstrumentation, CheckedVars);
75
isInstrumentable(Cfg * Func)76 bool ASanInstrumentation::isInstrumentable(Cfg *Func) {
77 std::string FuncName = Func->getFunctionName().toStringOrEmpty();
78 return FuncName == "" || (FuncIgnoreList.count(FuncName) == 0 &&
79 FuncName.find(ASanPrefix) != 0);
80 }
81
82 // Create redzones around all global variables, ensuring that the initializer
83 // types of the redzones and their associated globals match so that they are
84 // laid out together in memory.
instrumentGlobals(VariableDeclarationList & Globals)85 void ASanInstrumentation::instrumentGlobals(VariableDeclarationList &Globals) {
86 std::unique_lock<std::mutex> _(GlobalsMutex);
87 if (DidProcessGlobals)
88 return;
89 VariableDeclarationList NewGlobals;
90 // Global holding pointers to all redzones
91 auto *RzArray = VariableDeclaration::create(&NewGlobals);
92 // Global holding sizes of all redzones
93 auto *RzSizes = VariableDeclaration::create(&NewGlobals);
94
95 RzArray->setName(Ctx, RzArrayName);
96 RzSizes->setName(Ctx, RzSizesName);
97 RzArray->setIsConstant(true);
98 RzSizes->setIsConstant(true);
99 NewGlobals.push_back(RzArray);
100 NewGlobals.push_back(RzSizes);
101
102 using PrototypeMap = std::unordered_map<std::string, FunctionDeclaration *>;
103 PrototypeMap ProtoSubstitutions;
104 for (VariableDeclaration *Global : Globals) {
105 assert(Global->getAlignment() <= RzSize);
106 VariableDeclaration *RzLeft = VariableDeclaration::create(&NewGlobals);
107 VariableDeclaration *NewGlobal = Global;
108 VariableDeclaration *RzRight = VariableDeclaration::create(&NewGlobals);
109 RzLeft->setName(Ctx, nextRzName());
110 RzRight->setName(Ctx, nextRzName());
111 SizeT Alignment = std::max(RzSize, Global->getAlignment());
112 SizeT RzLeftSize = Alignment;
113 SizeT RzRightSize =
114 RzSize + Utils::OffsetToAlignment(Global->getNumBytes(), Alignment);
115 if (!Global->hasNonzeroInitializer()) {
116 RzLeft->addInitializer(VariableDeclaration::ZeroInitializer::create(
117 &NewGlobals, RzLeftSize));
118 RzRight->addInitializer(VariableDeclaration::ZeroInitializer::create(
119 &NewGlobals, RzRightSize));
120 } else {
121 RzLeft->addInitializer(VariableDeclaration::DataInitializer::create(
122 &NewGlobals, llvm::NaClBitcodeRecord::RecordVector(RzLeftSize, 'R')));
123 RzRight->addInitializer(VariableDeclaration::DataInitializer::create(
124 &NewGlobals,
125 llvm::NaClBitcodeRecord::RecordVector(RzRightSize, 'R')));
126
127 // replace any pointers to allocator functions
128 NewGlobal = VariableDeclaration::create(&NewGlobals);
129 NewGlobal->setName(Global->getName());
130 std::vector<VariableDeclaration::Initializer *> GlobalInits =
131 Global->getInitializers();
132 for (VariableDeclaration::Initializer *Init : GlobalInits) {
133 auto *RelocInit =
134 llvm::dyn_cast<VariableDeclaration::RelocInitializer>(Init);
135 if (RelocInit == nullptr) {
136 NewGlobal->addInitializer(Init);
137 continue;
138 }
139 const GlobalDeclaration *TargetDecl = RelocInit->getDeclaration();
140 const auto *TargetFunc =
141 llvm::dyn_cast<FunctionDeclaration>(TargetDecl);
142 if (TargetFunc == nullptr) {
143 NewGlobal->addInitializer(Init);
144 continue;
145 }
146 std::string TargetName = TargetDecl->getName().toStringOrEmpty();
147 StringMap::const_iterator Subst = FuncSubstitutions.find(TargetName);
148 if (Subst == FuncSubstitutions.end()) {
149 NewGlobal->addInitializer(Init);
150 continue;
151 }
152 std::string SubstName = Subst->second;
153 PrototypeMap::iterator SubstProtoEntry =
154 ProtoSubstitutions.find(SubstName);
155 FunctionDeclaration *SubstProto;
156 if (SubstProtoEntry != ProtoSubstitutions.end())
157 SubstProto = SubstProtoEntry->second;
158 else {
159 constexpr bool IsProto = true;
160 SubstProto = FunctionDeclaration::create(
161 Ctx, TargetFunc->getSignature(), TargetFunc->getCallingConv(),
162 llvm::GlobalValue::ExternalLinkage, IsProto);
163 SubstProto->setName(Ctx, SubstName);
164 ProtoSubstitutions.insert({SubstName, SubstProto});
165 }
166
167 NewGlobal->addInitializer(VariableDeclaration::RelocInitializer::create(
168 &NewGlobals, SubstProto, RelocOffsetArray(0)));
169 }
170 }
171
172 RzLeft->setIsConstant(Global->getIsConstant());
173 NewGlobal->setIsConstant(Global->getIsConstant());
174 RzRight->setIsConstant(Global->getIsConstant());
175 RzLeft->setAlignment(Alignment);
176 NewGlobal->setAlignment(Alignment);
177 RzRight->setAlignment(1);
178 RzArray->addInitializer(VariableDeclaration::RelocInitializer::create(
179 &NewGlobals, RzLeft, RelocOffsetArray(0)));
180 RzArray->addInitializer(VariableDeclaration::RelocInitializer::create(
181 &NewGlobals, RzRight, RelocOffsetArray(0)));
182 RzSizes->addInitializer(VariableDeclaration::DataInitializer::create(
183 &NewGlobals, sizeToByteVec(RzLeftSize)));
184 RzSizes->addInitializer(VariableDeclaration::DataInitializer::create(
185 &NewGlobals, sizeToByteVec(RzRightSize)));
186
187 NewGlobals.push_back(RzLeft);
188 NewGlobals.push_back(NewGlobal);
189 NewGlobals.push_back(RzRight);
190 RzGlobalsNum += 2;
191
192 GlobalSizes.insert({NewGlobal->getName(), NewGlobal->getNumBytes()});
193 }
194
195 // Replace old list of globals, without messing up arena allocators
196 Globals.clear();
197 Globals.merge(&NewGlobals);
198 DidProcessGlobals = true;
199
200 // Log the new set of globals
201 if (BuildDefs::dump() && (getFlags().getVerbose() & IceV_GlobalInit)) {
202 OstreamLocker _(Ctx);
203 Ctx->getStrDump() << "========= Instrumented Globals =========\n";
204 for (VariableDeclaration *Global : Globals) {
205 Global->dump(Ctx->getStrDump());
206 }
207 }
208 }
209
nextRzName()210 std::string ASanInstrumentation::nextRzName() {
211 std::stringstream Name;
212 Name << RzPrefix << RzNum++;
213 return Name.str();
214 }
215
216 // Check for an alloca signaling the presence of local variables and add a
217 // redzone if it is found
instrumentFuncStart(LoweringContext & Context)218 void ASanInstrumentation::instrumentFuncStart(LoweringContext &Context) {
219 if (ICE_TLS_GET_FIELD(LocalDtors) == nullptr) {
220 ICE_TLS_SET_FIELD(LocalDtors, new std::vector<InstStore *>());
221 ICE_TLS_SET_FIELD(LocalVars, new VarSizeMap());
222 }
223 Cfg *Func = Context.getNode()->getCfg();
224 using Entry = std::pair<SizeT, int32_t>;
225 std::vector<InstAlloca *> NewAllocas;
226 std::vector<Entry> PoisonVals;
227 Variable *FirstShadowLocVar;
228 InstArithmetic *ShadowIndexCalc;
229 InstArithmetic *ShadowLocCalc;
230 InstAlloca *Cur;
231 ConstantInteger32 *VarSizeOp;
232 while (!Context.atEnd()) {
233 Cur = llvm::dyn_cast<InstAlloca>(iteratorToInst(Context.getCur()));
234 VarSizeOp = (Cur == nullptr)
235 ? nullptr
236 : llvm::dyn_cast<ConstantInteger32>(Cur->getSizeInBytes());
237 if (Cur == nullptr || VarSizeOp == nullptr) {
238 Context.advanceCur();
239 Context.advanceNext();
240 continue;
241 }
242
243 Cur->setDeleted();
244
245 if (PoisonVals.empty()) {
246 // insert leftmost redzone
247 auto *LastRzVar = Func->makeVariable(IceType_i32);
248 LastRzVar->setName(Func, nextRzName());
249 auto *ByteCount = ConstantInteger32::create(Ctx, IceType_i32, RzSize);
250 constexpr SizeT Alignment = 8;
251 NewAllocas.emplace_back(
252 InstAlloca::create(Func, LastRzVar, ByteCount, Alignment));
253 PoisonVals.emplace_back(Entry{RzSize >> ShadowScaleLog2, StackPoisonVal});
254
255 // Calculate starting address for poisoning
256 FirstShadowLocVar = Func->makeVariable(IceType_i32);
257 FirstShadowLocVar->setName(Func, "firstShadowLoc");
258 auto *ShadowIndexVar = Func->makeVariable(IceType_i32);
259 ShadowIndexVar->setName(Func, "shadowIndex");
260
261 auto *ShadowScaleLog2Const =
262 ConstantInteger32::create(Ctx, IceType_i32, ShadowScaleLog2);
263 auto *ShadowMemLocConst =
264 ConstantInteger32::create(Ctx, IceType_i32, ShadowLength32);
265
266 ShadowIndexCalc =
267 InstArithmetic::create(Func, InstArithmetic::Lshr, ShadowIndexVar,
268 LastRzVar, ShadowScaleLog2Const);
269 ShadowLocCalc =
270 InstArithmetic::create(Func, InstArithmetic::Add, FirstShadowLocVar,
271 ShadowIndexVar, ShadowMemLocConst);
272 }
273
274 // create the new alloca that includes a redzone
275 SizeT VarSize = VarSizeOp->getValue();
276 Variable *Dest = Cur->getDest();
277 ICE_TLS_GET_FIELD(LocalVars)->insert({Dest, VarSize});
278 SizeT RzPadding = RzSize + Utils::OffsetToAlignment(VarSize, RzSize);
279 auto *ByteCount =
280 ConstantInteger32::create(Ctx, IceType_i32, VarSize + RzPadding);
281 constexpr SizeT Alignment = 8;
282 NewAllocas.emplace_back(
283 InstAlloca::create(Func, Dest, ByteCount, Alignment));
284
285 const SizeT Zeros = VarSize >> ShadowScaleLog2;
286 const SizeT Offset = VarSize % ShadowScale;
287 const SizeT PoisonBytes =
288 ((VarSize + RzPadding) >> ShadowScaleLog2) - Zeros - 1;
289 if (Zeros > 0)
290 PoisonVals.emplace_back(Entry{Zeros, 0});
291 PoisonVals.emplace_back(Entry{1, (Offset == 0) ? StackPoisonVal : Offset});
292 PoisonVals.emplace_back(Entry{PoisonBytes, StackPoisonVal});
293 Context.advanceCur();
294 Context.advanceNext();
295 }
296
297 Context.rewind();
298 if (PoisonVals.empty()) {
299 Context.advanceNext();
300 return;
301 }
302 for (InstAlloca *RzAlloca : NewAllocas) {
303 Context.insert(RzAlloca);
304 }
305 Context.insert(ShadowIndexCalc);
306 Context.insert(ShadowLocCalc);
307
308 // Poison redzones
309 std::vector<Entry>::iterator Iter = PoisonVals.begin();
310 for (SizeT Offset = 0; Iter != PoisonVals.end(); Offset += BytesPerWord) {
311 int32_t CurVals[BytesPerWord] = {0};
312 for (uint32_t i = 0; i < BytesPerWord; ++i) {
313 if (Iter == PoisonVals.end())
314 break;
315 Entry Val = *Iter;
316 CurVals[i] = Val.second;
317 --Val.first;
318 if (Val.first > 0)
319 *Iter = Val;
320 else
321 ++Iter;
322 }
323 int32_t Poison = ((CurVals[3] & 0xff) << 24) | ((CurVals[2] & 0xff) << 16) |
324 ((CurVals[1] & 0xff) << 8) | (CurVals[0] & 0xff);
325 if (Poison == 0)
326 continue;
327 auto *PoisonConst = ConstantInteger32::create(Ctx, IceType_i32, Poison);
328 auto *ZeroConst = ConstantInteger32::create(Ctx, IceType_i32, 0);
329 auto *OffsetConst = ConstantInteger32::create(Ctx, IceType_i32, Offset);
330 auto *PoisonAddrVar = Func->makeVariable(IceType_i32);
331 Context.insert(InstArithmetic::create(Func, InstArithmetic::Add,
332 PoisonAddrVar, FirstShadowLocVar,
333 OffsetConst));
334 Context.insert(InstStore::create(Func, PoisonConst, PoisonAddrVar));
335 ICE_TLS_GET_FIELD(LocalDtors)
336 ->emplace_back(InstStore::create(Func, ZeroConst, PoisonAddrVar));
337 }
338 Context.advanceNext();
339 }
340
instrumentCall(LoweringContext & Context,InstCall * Instr)341 void ASanInstrumentation::instrumentCall(LoweringContext &Context,
342 InstCall *Instr) {
343 auto *CallTarget =
344 llvm::dyn_cast<ConstantRelocatable>(Instr->getCallTarget());
345 if (CallTarget == nullptr)
346 return;
347
348 std::string TargetName = CallTarget->getName().toStringOrEmpty();
349 auto Subst = FuncSubstitutions.find(TargetName);
350 if (Subst == FuncSubstitutions.end())
351 return;
352
353 std::string SubName = Subst->second;
354 Constant *NewFunc = Ctx->getConstantExternSym(Ctx->getGlobalString(SubName));
355 auto *NewCall =
356 InstCall::create(Context.getNode()->getCfg(), Instr->getNumArgs(),
357 Instr->getDest(), NewFunc, Instr->isTailcall());
358 for (SizeT I = 0, Args = Instr->getNumArgs(); I < Args; ++I)
359 NewCall->addArg(Instr->getArg(I));
360 Context.insert(NewCall);
361 Instr->setDeleted();
362 }
363
instrumentLoad(LoweringContext & Context,InstLoad * Instr)364 void ASanInstrumentation::instrumentLoad(LoweringContext &Context,
365 InstLoad *Instr) {
366 Operand *Src = Instr->getLoadAddress();
367 if (auto *Reloc = llvm::dyn_cast<ConstantRelocatable>(Src)) {
368 auto *NewLoad = InstLoad::create(Context.getNode()->getCfg(),
369 Instr->getDest(), instrumentReloc(Reloc));
370 Instr->setDeleted();
371 Context.insert(NewLoad);
372 Instr = NewLoad;
373 }
374 Constant *Func =
375 Ctx->getConstantExternSym(Ctx->getGlobalString("__asan_check_load"));
376 instrumentAccess(Context, Instr->getLoadAddress(),
377 typeWidthInBytes(Instr->getDest()->getType()), Func);
378 }
379
instrumentStore(LoweringContext & Context,InstStore * Instr)380 void ASanInstrumentation::instrumentStore(LoweringContext &Context,
381 InstStore *Instr) {
382 Operand *Data = Instr->getData();
383 if (auto *Reloc = llvm::dyn_cast<ConstantRelocatable>(Data)) {
384 auto *NewStore =
385 InstStore::create(Context.getNode()->getCfg(), instrumentReloc(Reloc),
386 Instr->getStoreAddress());
387 Instr->setDeleted();
388 Context.insert(NewStore);
389 Instr = NewStore;
390 }
391 Constant *Func =
392 Ctx->getConstantExternSym(Ctx->getGlobalString("__asan_check_store"));
393 instrumentAccess(Context, Instr->getStoreAddress(),
394 typeWidthInBytes(Instr->getData()->getType()), Func);
395 }
396
397 ConstantRelocatable *
instrumentReloc(ConstantRelocatable * Reloc)398 ASanInstrumentation::instrumentReloc(ConstantRelocatable *Reloc) {
399 std::string DataName = Reloc->getName().toString();
400 StringMap::const_iterator DataSub = FuncSubstitutions.find(DataName);
401 if (DataSub != FuncSubstitutions.end()) {
402 return ConstantRelocatable::create(
403 Ctx, Reloc->getType(),
404 RelocatableTuple(Reloc->getOffset(), RelocOffsetArray(0),
405 Ctx->getGlobalString(DataSub->second),
406 Reloc->getEmitString()));
407 }
408 return Reloc;
409 }
410
instrumentAccess(LoweringContext & Context,Operand * Op,SizeT Size,Constant * CheckFunc)411 void ASanInstrumentation::instrumentAccess(LoweringContext &Context,
412 Operand *Op, SizeT Size,
413 Constant *CheckFunc) {
414 // Skip redundant checks within basic blocks
415 VarSizeMap *Checked = ICE_TLS_GET_FIELD(CheckedVars);
416 if (ICE_TLS_GET_FIELD(CurNode) != Context.getNode()) {
417 ICE_TLS_SET_FIELD(CurNode, Context.getNode());
418 if (Checked == NULL) {
419 Checked = new VarSizeMap();
420 ICE_TLS_SET_FIELD(CheckedVars, Checked);
421 }
422 Checked->clear();
423 }
424 VarSizeMap::iterator PrevCheck = Checked->find(Op);
425 if (PrevCheck != Checked->end() && PrevCheck->second >= Size)
426 return;
427 else
428 Checked->insert({Op, Size});
429
430 // check for known good local access
431 VarSizeMap::iterator LocalSize = ICE_TLS_GET_FIELD(LocalVars)->find(Op);
432 if (LocalSize != ICE_TLS_GET_FIELD(LocalVars)->end() &&
433 LocalSize->second >= Size)
434 return;
435 if (isOkGlobalAccess(Op, Size))
436 return;
437 constexpr SizeT NumArgs = 2;
438 constexpr Variable *Void = nullptr;
439 constexpr bool NoTailCall = false;
440 auto *Call = InstCall::create(Context.getNode()->getCfg(), NumArgs, Void,
441 CheckFunc, NoTailCall);
442 Call->addArg(Op);
443 Call->addArg(ConstantInteger32::create(Ctx, IceType_i32, Size));
444 // play games to insert the call before the access instruction
445 InstList::iterator Next = Context.getNext();
446 Context.setInsertPoint(Context.getCur());
447 Context.insert(Call);
448 Context.setNext(Next);
449 }
450
451 // TODO(tlively): Trace back load and store addresses to find their real offsets
isOkGlobalAccess(Operand * Op,SizeT Size)452 bool ASanInstrumentation::isOkGlobalAccess(Operand *Op, SizeT Size) {
453 auto *Reloc = llvm::dyn_cast<ConstantRelocatable>(Op);
454 if (Reloc == nullptr)
455 return false;
456 RelocOffsetT Offset = Reloc->getOffset();
457 GlobalSizeMap::iterator GlobalSize = GlobalSizes.find(Reloc->getName());
458 return GlobalSize != GlobalSizes.end() && GlobalSize->second - Offset >= Size;
459 }
460
instrumentRet(LoweringContext & Context,InstRet *)461 void ASanInstrumentation::instrumentRet(LoweringContext &Context, InstRet *) {
462 Cfg *Func = Context.getNode()->getCfg();
463 Context.setInsertPoint(Context.getCur());
464 for (InstStore *RzUnpoison : *ICE_TLS_GET_FIELD(LocalDtors)) {
465 Context.insert(InstStore::create(Func, RzUnpoison->getData(),
466 RzUnpoison->getStoreAddress()));
467 }
468 Context.advanceCur();
469 Context.advanceNext();
470 }
471
instrumentStart(Cfg * Func)472 void ASanInstrumentation::instrumentStart(Cfg *Func) {
473 Constant *ShadowMemInit =
474 Ctx->getConstantExternSym(Ctx->getGlobalString("__asan_init"));
475 constexpr SizeT NumArgs = 3;
476 constexpr Variable *Void = nullptr;
477 constexpr bool NoTailCall = false;
478 auto *Call = InstCall::create(Func, NumArgs, Void, ShadowMemInit, NoTailCall);
479 Func->getEntryNode()->getInsts().push_front(Call);
480
481 instrumentGlobals(*getGlobals());
482
483 Call->addArg(ConstantInteger32::create(Ctx, IceType_i32, RzGlobalsNum));
484 Call->addArg(Ctx->getConstantSym(0, Ctx->getGlobalString(RzArrayName)));
485 Call->addArg(Ctx->getConstantSym(0, Ctx->getGlobalString(RzSizesName)));
486 }
487
488 // TODO(tlively): make this more efficient with swap idiom
finishFunc(Cfg *)489 void ASanInstrumentation::finishFunc(Cfg *) {
490 ICE_TLS_GET_FIELD(LocalVars)->clear();
491 ICE_TLS_GET_FIELD(LocalDtors)->clear();
492 }
493
494 } // end of namespace Ice
495