1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This checker defines the attack surface for generic taint propagation.
11 //
12 // The taint information produced by it might be useful to other checkers. For
13 // example, checkers should report errors which involve tainted data more
14 // aggressively, even if the involved symbols are under constrained.
15 //
16 //===----------------------------------------------------------------------===//
17 #include "ClangSACheckers.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/Basic/Builtins.h"
20 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
21 #include "clang/StaticAnalyzer/Core/Checker.h"
22 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
23 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
24 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
25 #include <climits>
26
27 using namespace clang;
28 using namespace ento;
29
30 namespace {
31 class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
32 check::PreStmt<CallExpr> > {
33 public:
getTag()34 static void *getTag() { static int Tag; return &Tag; }
35
36 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
37 void checkPostStmt(const DeclRefExpr *DRE, CheckerContext &C) const;
38
39 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
40
41 private:
42 static const unsigned InvalidArgIndex = UINT_MAX;
43 /// Denotes the return vale.
44 static const unsigned ReturnValueIndex = UINT_MAX - 1;
45
46 mutable OwningPtr<BugType> BT;
initBugType() const47 inline void initBugType() const {
48 if (!BT)
49 BT.reset(new BugType("Use of Untrusted Data", "Untrusted Data"));
50 }
51
52 /// \brief Catch taint related bugs. Check if tainted data is passed to a
53 /// system call etc.
54 bool checkPre(const CallExpr *CE, CheckerContext &C) const;
55
56 /// \brief Add taint sources on a pre-visit.
57 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
58
59 /// \brief Propagate taint generated at pre-visit.
60 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
61
62 /// \brief Add taint sources on a post visit.
63 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
64
65 /// Check if the region the expression evaluates to is the standard input,
66 /// and thus, is tainted.
67 static bool isStdin(const Expr *E, CheckerContext &C);
68
69 /// \brief Given a pointer argument, get the symbol of the value it contains
70 /// (points to).
71 static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
72
73 /// Functions defining the attack surface.
74 typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
75 CheckerContext &C) const;
76 ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
77 ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
78 ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
79
80 /// Taint the scanned input if the file is tainted.
81 ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
82
83 /// Check for CWE-134: Uncontrolled Format String.
84 static const char MsgUncontrolledFormatString[];
85 bool checkUncontrolledFormatString(const CallExpr *CE,
86 CheckerContext &C) const;
87
88 /// Check for:
89 /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
90 /// CWE-78, "Failure to Sanitize Data into an OS Command"
91 static const char MsgSanitizeSystemArgs[];
92 bool checkSystemCall(const CallExpr *CE, StringRef Name,
93 CheckerContext &C) const;
94
95 /// Check if tainted data is used as a buffer size ins strn.. functions,
96 /// and allocators.
97 static const char MsgTaintedBufferSize[];
98 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
99 CheckerContext &C) const;
100
101 /// Generate a report if the expression is tainted or points to tainted data.
102 bool generateReportIfTainted(const Expr *E, const char Msg[],
103 CheckerContext &C) const;
104
105
106 typedef SmallVector<unsigned, 2> ArgVector;
107
108 /// \brief A struct used to specify taint propagation rules for a function.
109 ///
110 /// If any of the possible taint source arguments is tainted, all of the
111 /// destination arguments should also be tainted. Use InvalidArgIndex in the
112 /// src list to specify that all of the arguments can introduce taint. Use
113 /// InvalidArgIndex in the dst arguments to signify that all the non-const
114 /// pointer and reference arguments might be tainted on return. If
115 /// ReturnValueIndex is added to the dst list, the return value will be
116 /// tainted.
117 struct TaintPropagationRule {
118 /// List of arguments which can be taint sources and should be checked.
119 ArgVector SrcArgs;
120 /// List of arguments which should be tainted on function return.
121 ArgVector DstArgs;
122 // TODO: Check if using other data structures would be more optimal.
123
TaintPropagationRule__anon32531e6c0111::GenericTaintChecker::TaintPropagationRule124 TaintPropagationRule() {}
125
TaintPropagationRule__anon32531e6c0111::GenericTaintChecker::TaintPropagationRule126 TaintPropagationRule(unsigned SArg,
127 unsigned DArg, bool TaintRet = false) {
128 SrcArgs.push_back(SArg);
129 DstArgs.push_back(DArg);
130 if (TaintRet)
131 DstArgs.push_back(ReturnValueIndex);
132 }
133
TaintPropagationRule__anon32531e6c0111::GenericTaintChecker::TaintPropagationRule134 TaintPropagationRule(unsigned SArg1, unsigned SArg2,
135 unsigned DArg, bool TaintRet = false) {
136 SrcArgs.push_back(SArg1);
137 SrcArgs.push_back(SArg2);
138 DstArgs.push_back(DArg);
139 if (TaintRet)
140 DstArgs.push_back(ReturnValueIndex);
141 }
142
143 /// Get the propagation rule for a given function.
144 static TaintPropagationRule
145 getTaintPropagationRule(const FunctionDecl *FDecl,
146 StringRef Name,
147 CheckerContext &C);
148
addSrcArg__anon32531e6c0111::GenericTaintChecker::TaintPropagationRule149 inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
addDstArg__anon32531e6c0111::GenericTaintChecker::TaintPropagationRule150 inline void addDstArg(unsigned A) { DstArgs.push_back(A); }
151
isNull__anon32531e6c0111::GenericTaintChecker::TaintPropagationRule152 inline bool isNull() const { return SrcArgs.empty(); }
153
isDestinationArgument__anon32531e6c0111::GenericTaintChecker::TaintPropagationRule154 inline bool isDestinationArgument(unsigned ArgNum) const {
155 return (std::find(DstArgs.begin(),
156 DstArgs.end(), ArgNum) != DstArgs.end());
157 }
158
isTaintedOrPointsToTainted__anon32531e6c0111::GenericTaintChecker::TaintPropagationRule159 static inline bool isTaintedOrPointsToTainted(const Expr *E,
160 ProgramStateRef State,
161 CheckerContext &C) {
162 return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) ||
163 (E->getType().getTypePtr()->isPointerType() &&
164 State->isTainted(getPointedToSymbol(C, E))));
165 }
166
167 /// \brief Pre-process a function which propagates taint according to the
168 /// taint rule.
169 ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
170
171 };
172 };
173
174 const unsigned GenericTaintChecker::ReturnValueIndex;
175 const unsigned GenericTaintChecker::InvalidArgIndex;
176
177 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
178 "Untrusted data is used as a format string "
179 "(CWE-134: Uncontrolled Format String)";
180
181 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
182 "Untrusted data is passed to a system call "
183 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
184
185 const char GenericTaintChecker::MsgTaintedBufferSize[] =
186 "Untrusted data is used to specify the buffer size "
187 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
188 "character data and the null terminator)";
189
190 } // end of anonymous namespace
191
192 /// A set which is used to pass information from call pre-visit instruction
193 /// to the call post-visit. The values are unsigned integers, which are either
194 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
195 /// points to data, which should be tainted on return.
REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit,unsigned)196 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
197
198 GenericTaintChecker::TaintPropagationRule
199 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
200 const FunctionDecl *FDecl,
201 StringRef Name,
202 CheckerContext &C) {
203 // TODO: Currently, we might loose precision here: we always mark a return
204 // value as tainted even if it's just a pointer, pointing to tainted data.
205
206 // Check for exact name match for functions without builtin substitutes.
207 TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
208 .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
209 .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
210 .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
211 .Case("getc", TaintPropagationRule(0, ReturnValueIndex))
212 .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
213 .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
214 .Case("getw", TaintPropagationRule(0, ReturnValueIndex))
215 .Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
216 .Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
217 .Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
218 .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
219 .Case("read", TaintPropagationRule(0, 2, 1, true))
220 .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
221 .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
222 .Case("fgets", TaintPropagationRule(2, 0, true))
223 .Case("getline", TaintPropagationRule(2, 0))
224 .Case("getdelim", TaintPropagationRule(3, 0))
225 .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
226 .Default(TaintPropagationRule());
227
228 if (!Rule.isNull())
229 return Rule;
230
231 // Check if it's one of the memory setting/copying functions.
232 // This check is specialized but faster then calling isCLibraryFunction.
233 unsigned BId = 0;
234 if ( (BId = FDecl->getMemoryFunctionKind()) )
235 switch(BId) {
236 case Builtin::BImemcpy:
237 case Builtin::BImemmove:
238 case Builtin::BIstrncpy:
239 case Builtin::BIstrncat:
240 return TaintPropagationRule(1, 2, 0, true);
241 case Builtin::BIstrlcpy:
242 case Builtin::BIstrlcat:
243 return TaintPropagationRule(1, 2, 0, false);
244 case Builtin::BIstrndup:
245 return TaintPropagationRule(0, 1, ReturnValueIndex);
246
247 default:
248 break;
249 };
250
251 // Process all other functions which could be defined as builtins.
252 if (Rule.isNull()) {
253 if (C.isCLibraryFunction(FDecl, "snprintf") ||
254 C.isCLibraryFunction(FDecl, "sprintf"))
255 return TaintPropagationRule(InvalidArgIndex, 0, true);
256 else if (C.isCLibraryFunction(FDecl, "strcpy") ||
257 C.isCLibraryFunction(FDecl, "stpcpy") ||
258 C.isCLibraryFunction(FDecl, "strcat"))
259 return TaintPropagationRule(1, 0, true);
260 else if (C.isCLibraryFunction(FDecl, "bcopy"))
261 return TaintPropagationRule(0, 2, 1, false);
262 else if (C.isCLibraryFunction(FDecl, "strdup") ||
263 C.isCLibraryFunction(FDecl, "strdupa"))
264 return TaintPropagationRule(0, ReturnValueIndex);
265 else if (C.isCLibraryFunction(FDecl, "wcsdup"))
266 return TaintPropagationRule(0, ReturnValueIndex);
267 }
268
269 // Skipping the following functions, since they might be used for cleansing
270 // or smart memory copy:
271 // - memccpy - copying until hitting a special character.
272
273 return TaintPropagationRule();
274 }
275
checkPreStmt(const CallExpr * CE,CheckerContext & C) const276 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
277 CheckerContext &C) const {
278 // Check for errors first.
279 if (checkPre(CE, C))
280 return;
281
282 // Add taint second.
283 addSourcesPre(CE, C);
284 }
285
checkPostStmt(const CallExpr * CE,CheckerContext & C) const286 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
287 CheckerContext &C) const {
288 if (propagateFromPre(CE, C))
289 return;
290 addSourcesPost(CE, C);
291 }
292
addSourcesPre(const CallExpr * CE,CheckerContext & C) const293 void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
294 CheckerContext &C) const {
295 ProgramStateRef State = 0;
296 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
297 if (!FDecl || FDecl->getKind() != Decl::Function)
298 return;
299
300 StringRef Name = C.getCalleeName(FDecl);
301 if (Name.empty())
302 return;
303
304 // First, try generating a propagation rule for this function.
305 TaintPropagationRule Rule =
306 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
307 if (!Rule.isNull()) {
308 State = Rule.process(CE, C);
309 if (!State)
310 return;
311 C.addTransition(State);
312 return;
313 }
314
315 // Otherwise, check if we have custom pre-processing implemented.
316 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
317 .Case("fscanf", &GenericTaintChecker::preFscanf)
318 .Default(0);
319 // Check and evaluate the call.
320 if (evalFunction)
321 State = (this->*evalFunction)(CE, C);
322 if (!State)
323 return;
324 C.addTransition(State);
325
326 }
327
propagateFromPre(const CallExpr * CE,CheckerContext & C) const328 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
329 CheckerContext &C) const {
330 ProgramStateRef State = C.getState();
331
332 // Depending on what was tainted at pre-visit, we determined a set of
333 // arguments which should be tainted after the function returns. These are
334 // stored in the state as TaintArgsOnPostVisit set.
335 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
336 if (TaintArgs.isEmpty())
337 return false;
338
339 for (llvm::ImmutableSet<unsigned>::iterator
340 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
341 unsigned ArgNum = *I;
342
343 // Special handling for the tainted return value.
344 if (ArgNum == ReturnValueIndex) {
345 State = State->addTaint(CE, C.getLocationContext());
346 continue;
347 }
348
349 // The arguments are pointer arguments. The data they are pointing at is
350 // tainted after the call.
351 if (CE->getNumArgs() < (ArgNum + 1))
352 return false;
353 const Expr* Arg = CE->getArg(ArgNum);
354 SymbolRef Sym = getPointedToSymbol(C, Arg);
355 if (Sym)
356 State = State->addTaint(Sym);
357 }
358
359 // Clear up the taint info from the state.
360 State = State->remove<TaintArgsOnPostVisit>();
361
362 if (State != C.getState()) {
363 C.addTransition(State);
364 return true;
365 }
366 return false;
367 }
368
addSourcesPost(const CallExpr * CE,CheckerContext & C) const369 void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
370 CheckerContext &C) const {
371 // Define the attack surface.
372 // Set the evaluation function by switching on the callee name.
373 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
374 if (!FDecl || FDecl->getKind() != Decl::Function)
375 return;
376
377 StringRef Name = C.getCalleeName(FDecl);
378 if (Name.empty())
379 return;
380 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
381 .Case("scanf", &GenericTaintChecker::postScanf)
382 // TODO: Add support for vfscanf & family.
383 .Case("getchar", &GenericTaintChecker::postRetTaint)
384 .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
385 .Case("getenv", &GenericTaintChecker::postRetTaint)
386 .Case("fopen", &GenericTaintChecker::postRetTaint)
387 .Case("fdopen", &GenericTaintChecker::postRetTaint)
388 .Case("freopen", &GenericTaintChecker::postRetTaint)
389 .Case("getch", &GenericTaintChecker::postRetTaint)
390 .Case("wgetch", &GenericTaintChecker::postRetTaint)
391 .Case("socket", &GenericTaintChecker::postSocket)
392 .Default(0);
393
394 // If the callee isn't defined, it is not of security concern.
395 // Check and evaluate the call.
396 ProgramStateRef State = 0;
397 if (evalFunction)
398 State = (this->*evalFunction)(CE, C);
399 if (!State)
400 return;
401
402 C.addTransition(State);
403 }
404
checkPre(const CallExpr * CE,CheckerContext & C) const405 bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
406
407 if (checkUncontrolledFormatString(CE, C))
408 return true;
409
410 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
411 if (!FDecl || FDecl->getKind() != Decl::Function)
412 return false;
413
414 StringRef Name = C.getCalleeName(FDecl);
415 if (Name.empty())
416 return false;
417
418 if (checkSystemCall(CE, Name, C))
419 return true;
420
421 if (checkTaintedBufferSize(CE, FDecl, C))
422 return true;
423
424 return false;
425 }
426
getPointedToSymbol(CheckerContext & C,const Expr * Arg)427 SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
428 const Expr* Arg) {
429 ProgramStateRef State = C.getState();
430 SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
431 if (AddrVal.isUnknownOrUndef())
432 return 0;
433
434 Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
435 if (!AddrLoc)
436 return 0;
437
438 const PointerType *ArgTy =
439 dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
440 SVal Val = State->getSVal(*AddrLoc,
441 ArgTy ? ArgTy->getPointeeType(): QualType());
442 return Val.getAsSymbol();
443 }
444
445 ProgramStateRef
process(const CallExpr * CE,CheckerContext & C) const446 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
447 CheckerContext &C) const {
448 ProgramStateRef State = C.getState();
449
450 // Check for taint in arguments.
451 bool IsTainted = false;
452 for (ArgVector::const_iterator I = SrcArgs.begin(),
453 E = SrcArgs.end(); I != E; ++I) {
454 unsigned ArgNum = *I;
455
456 if (ArgNum == InvalidArgIndex) {
457 // Check if any of the arguments is tainted, but skip the
458 // destination arguments.
459 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
460 if (isDestinationArgument(i))
461 continue;
462 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
463 break;
464 }
465 break;
466 }
467
468 if (CE->getNumArgs() < (ArgNum + 1))
469 return State;
470 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
471 break;
472 }
473 if (!IsTainted)
474 return State;
475
476 // Mark the arguments which should be tainted after the function returns.
477 for (ArgVector::const_iterator I = DstArgs.begin(),
478 E = DstArgs.end(); I != E; ++I) {
479 unsigned ArgNum = *I;
480
481 // Should we mark all arguments as tainted?
482 if (ArgNum == InvalidArgIndex) {
483 // For all pointer and references that were passed in:
484 // If they are not pointing to const data, mark data as tainted.
485 // TODO: So far we are just going one level down; ideally we'd need to
486 // recurse here.
487 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
488 const Expr *Arg = CE->getArg(i);
489 // Process pointer argument.
490 const Type *ArgTy = Arg->getType().getTypePtr();
491 QualType PType = ArgTy->getPointeeType();
492 if ((!PType.isNull() && !PType.isConstQualified())
493 || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
494 State = State->add<TaintArgsOnPostVisit>(i);
495 }
496 continue;
497 }
498
499 // Should mark the return value?
500 if (ArgNum == ReturnValueIndex) {
501 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
502 continue;
503 }
504
505 // Mark the given argument.
506 assert(ArgNum < CE->getNumArgs());
507 State = State->add<TaintArgsOnPostVisit>(ArgNum);
508 }
509
510 return State;
511 }
512
513
514 // If argument 0 (file descriptor) is tainted, all arguments except for arg 0
515 // and arg 1 should get taint.
preFscanf(const CallExpr * CE,CheckerContext & C) const516 ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
517 CheckerContext &C) const {
518 assert(CE->getNumArgs() >= 2);
519 ProgramStateRef State = C.getState();
520
521 // Check is the file descriptor is tainted.
522 if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
523 isStdin(CE->getArg(0), C)) {
524 // All arguments except for the first two should get taint.
525 for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
526 State = State->add<TaintArgsOnPostVisit>(i);
527 return State;
528 }
529
530 return 0;
531 }
532
533
534 // If argument 0(protocol domain) is network, the return value should get taint.
postSocket(const CallExpr * CE,CheckerContext & C) const535 ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
536 CheckerContext &C) const {
537 ProgramStateRef State = C.getState();
538 if (CE->getNumArgs() < 3)
539 return State;
540
541 SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
542 StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
543 // White list the internal communication protocols.
544 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
545 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
546 return State;
547 State = State->addTaint(CE, C.getLocationContext());
548 return State;
549 }
550
postScanf(const CallExpr * CE,CheckerContext & C) const551 ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
552 CheckerContext &C) const {
553 ProgramStateRef State = C.getState();
554 if (CE->getNumArgs() < 2)
555 return State;
556
557 // All arguments except for the very first one should get taint.
558 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
559 // The arguments are pointer arguments. The data they are pointing at is
560 // tainted after the call.
561 const Expr* Arg = CE->getArg(i);
562 SymbolRef Sym = getPointedToSymbol(C, Arg);
563 if (Sym)
564 State = State->addTaint(Sym);
565 }
566 return State;
567 }
568
postRetTaint(const CallExpr * CE,CheckerContext & C) const569 ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
570 CheckerContext &C) const {
571 return C.getState()->addTaint(CE, C.getLocationContext());
572 }
573
isStdin(const Expr * E,CheckerContext & C)574 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
575 ProgramStateRef State = C.getState();
576 SVal Val = State->getSVal(E, C.getLocationContext());
577
578 // stdin is a pointer, so it would be a region.
579 const MemRegion *MemReg = Val.getAsRegion();
580
581 // The region should be symbolic, we do not know it's value.
582 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
583 if (!SymReg)
584 return false;
585
586 // Get it's symbol and find the declaration region it's pointing to.
587 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
588 if (!Sm)
589 return false;
590 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
591 if (!DeclReg)
592 return false;
593
594 // This region corresponds to a declaration, find out if it's a global/extern
595 // variable named stdin with the proper type.
596 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
597 D = D->getCanonicalDecl();
598 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
599 if (const PointerType * PtrTy =
600 dyn_cast<PointerType>(D->getType().getTypePtr()))
601 if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
602 return true;
603 }
604 return false;
605 }
606
getPrintfFormatArgumentNum(const CallExpr * CE,const CheckerContext & C,unsigned int & ArgNum)607 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
608 const CheckerContext &C,
609 unsigned int &ArgNum) {
610 // Find if the function contains a format string argument.
611 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
612 // vsnprintf, syslog, custom annotated functions.
613 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
614 if (!FDecl)
615 return false;
616 for (specific_attr_iterator<FormatAttr>
617 i = FDecl->specific_attr_begin<FormatAttr>(),
618 e = FDecl->specific_attr_end<FormatAttr>(); i != e ; ++i) {
619
620 const FormatAttr *Format = *i;
621 ArgNum = Format->getFormatIdx() - 1;
622 if ((Format->getType() == "printf") && CE->getNumArgs() > ArgNum)
623 return true;
624 }
625
626 // Or if a function is named setproctitle (this is a heuristic).
627 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
628 ArgNum = 0;
629 return true;
630 }
631
632 return false;
633 }
634
generateReportIfTainted(const Expr * E,const char Msg[],CheckerContext & C) const635 bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
636 const char Msg[],
637 CheckerContext &C) const {
638 assert(E);
639
640 // Check for taint.
641 ProgramStateRef State = C.getState();
642 if (!State->isTainted(getPointedToSymbol(C, E)) &&
643 !State->isTainted(E, C.getLocationContext()))
644 return false;
645
646 // Generate diagnostic.
647 if (ExplodedNode *N = C.addTransition()) {
648 initBugType();
649 BugReport *report = new BugReport(*BT, Msg, N);
650 report->addRange(E->getSourceRange());
651 C.emitReport(report);
652 return true;
653 }
654 return false;
655 }
656
checkUncontrolledFormatString(const CallExpr * CE,CheckerContext & C) const657 bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
658 CheckerContext &C) const{
659 // Check if the function contains a format string argument.
660 unsigned int ArgNum = 0;
661 if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
662 return false;
663
664 // If either the format string content or the pointer itself are tainted, warn.
665 if (generateReportIfTainted(CE->getArg(ArgNum),
666 MsgUncontrolledFormatString, C))
667 return true;
668 return false;
669 }
670
checkSystemCall(const CallExpr * CE,StringRef Name,CheckerContext & C) const671 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
672 StringRef Name,
673 CheckerContext &C) const {
674 // TODO: It might make sense to run this check on demand. In some cases,
675 // we should check if the environment has been cleansed here. We also might
676 // need to know if the user was reset before these calls(seteuid).
677 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
678 .Case("system", 0)
679 .Case("popen", 0)
680 .Case("execl", 0)
681 .Case("execle", 0)
682 .Case("execlp", 0)
683 .Case("execv", 0)
684 .Case("execvp", 0)
685 .Case("execvP", 0)
686 .Case("execve", 0)
687 .Case("dlopen", 0)
688 .Default(UINT_MAX);
689
690 if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
691 return false;
692
693 if (generateReportIfTainted(CE->getArg(ArgNum),
694 MsgSanitizeSystemArgs, C))
695 return true;
696
697 return false;
698 }
699
700 // TODO: Should this check be a part of the CString checker?
701 // If yes, should taint be a global setting?
checkTaintedBufferSize(const CallExpr * CE,const FunctionDecl * FDecl,CheckerContext & C) const702 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
703 const FunctionDecl *FDecl,
704 CheckerContext &C) const {
705 // If the function has a buffer size argument, set ArgNum.
706 unsigned ArgNum = InvalidArgIndex;
707 unsigned BId = 0;
708 if ( (BId = FDecl->getMemoryFunctionKind()) )
709 switch(BId) {
710 case Builtin::BImemcpy:
711 case Builtin::BImemmove:
712 case Builtin::BIstrncpy:
713 ArgNum = 2;
714 break;
715 case Builtin::BIstrndup:
716 ArgNum = 1;
717 break;
718 default:
719 break;
720 };
721
722 if (ArgNum == InvalidArgIndex) {
723 if (C.isCLibraryFunction(FDecl, "malloc") ||
724 C.isCLibraryFunction(FDecl, "calloc") ||
725 C.isCLibraryFunction(FDecl, "alloca"))
726 ArgNum = 0;
727 else if (C.isCLibraryFunction(FDecl, "memccpy"))
728 ArgNum = 3;
729 else if (C.isCLibraryFunction(FDecl, "realloc"))
730 ArgNum = 1;
731 else if (C.isCLibraryFunction(FDecl, "bcopy"))
732 ArgNum = 2;
733 }
734
735 if (ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
736 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C))
737 return true;
738
739 return false;
740 }
741
registerGenericTaintChecker(CheckerManager & mgr)742 void ento::registerGenericTaintChecker(CheckerManager &mgr) {
743 mgr.registerChecker<GenericTaintChecker>();
744 }
745