1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This checker defines the attack surface for generic taint propagation.
11 //
12 // The taint information produced by it might be useful to other checkers. For
13 // example, checkers should report errors which involve tainted data more
14 // aggressively, even if the involved symbols are under constrained.
15 //
16 //===----------------------------------------------------------------------===//
17 #include "ClangSACheckers.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/Basic/Builtins.h"
20 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
21 #include "clang/StaticAnalyzer/Core/Checker.h"
22 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
23 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
24 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
25 #include <climits>
26
27 using namespace clang;
28 using namespace ento;
29
30 namespace {
31 class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
32 check::PreStmt<CallExpr> > {
33 public:
getTag()34 static void *getTag() { static int Tag; return &Tag; }
35
36 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
37
38 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
39
40 private:
41 static const unsigned InvalidArgIndex = UINT_MAX;
42 /// Denotes the return vale.
43 static const unsigned ReturnValueIndex = UINT_MAX - 1;
44
45 mutable std::unique_ptr<BugType> BT;
initBugType() const46 inline void initBugType() const {
47 if (!BT)
48 BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
49 }
50
51 /// \brief Catch taint related bugs. Check if tainted data is passed to a
52 /// system call etc.
53 bool checkPre(const CallExpr *CE, CheckerContext &C) const;
54
55 /// \brief Add taint sources on a pre-visit.
56 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
57
58 /// \brief Propagate taint generated at pre-visit.
59 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
60
61 /// \brief Add taint sources on a post visit.
62 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
63
64 /// Check if the region the expression evaluates to is the standard input,
65 /// and thus, is tainted.
66 static bool isStdin(const Expr *E, CheckerContext &C);
67
68 /// \brief Given a pointer argument, get the symbol of the value it contains
69 /// (points to).
70 static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
71
72 /// Functions defining the attack surface.
73 typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
74 CheckerContext &C) const;
75 ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
76 ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
77 ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
78
79 /// Taint the scanned input if the file is tainted.
80 ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
81
82 /// Check for CWE-134: Uncontrolled Format String.
83 static const char MsgUncontrolledFormatString[];
84 bool checkUncontrolledFormatString(const CallExpr *CE,
85 CheckerContext &C) const;
86
87 /// Check for:
88 /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
89 /// CWE-78, "Failure to Sanitize Data into an OS Command"
90 static const char MsgSanitizeSystemArgs[];
91 bool checkSystemCall(const CallExpr *CE, StringRef Name,
92 CheckerContext &C) const;
93
94 /// Check if tainted data is used as a buffer size ins strn.. functions,
95 /// and allocators.
96 static const char MsgTaintedBufferSize[];
97 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
98 CheckerContext &C) const;
99
100 /// Generate a report if the expression is tainted or points to tainted data.
101 bool generateReportIfTainted(const Expr *E, const char Msg[],
102 CheckerContext &C) const;
103
104
105 typedef SmallVector<unsigned, 2> ArgVector;
106
107 /// \brief A struct used to specify taint propagation rules for a function.
108 ///
109 /// If any of the possible taint source arguments is tainted, all of the
110 /// destination arguments should also be tainted. Use InvalidArgIndex in the
111 /// src list to specify that all of the arguments can introduce taint. Use
112 /// InvalidArgIndex in the dst arguments to signify that all the non-const
113 /// pointer and reference arguments might be tainted on return. If
114 /// ReturnValueIndex is added to the dst list, the return value will be
115 /// tainted.
116 struct TaintPropagationRule {
117 /// List of arguments which can be taint sources and should be checked.
118 ArgVector SrcArgs;
119 /// List of arguments which should be tainted on function return.
120 ArgVector DstArgs;
121 // TODO: Check if using other data structures would be more optimal.
122
TaintPropagationRule__anon9e4f7a610111::GenericTaintChecker::TaintPropagationRule123 TaintPropagationRule() {}
124
TaintPropagationRule__anon9e4f7a610111::GenericTaintChecker::TaintPropagationRule125 TaintPropagationRule(unsigned SArg,
126 unsigned DArg, bool TaintRet = false) {
127 SrcArgs.push_back(SArg);
128 DstArgs.push_back(DArg);
129 if (TaintRet)
130 DstArgs.push_back(ReturnValueIndex);
131 }
132
TaintPropagationRule__anon9e4f7a610111::GenericTaintChecker::TaintPropagationRule133 TaintPropagationRule(unsigned SArg1, unsigned SArg2,
134 unsigned DArg, bool TaintRet = false) {
135 SrcArgs.push_back(SArg1);
136 SrcArgs.push_back(SArg2);
137 DstArgs.push_back(DArg);
138 if (TaintRet)
139 DstArgs.push_back(ReturnValueIndex);
140 }
141
142 /// Get the propagation rule for a given function.
143 static TaintPropagationRule
144 getTaintPropagationRule(const FunctionDecl *FDecl,
145 StringRef Name,
146 CheckerContext &C);
147
addSrcArg__anon9e4f7a610111::GenericTaintChecker::TaintPropagationRule148 inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
addDstArg__anon9e4f7a610111::GenericTaintChecker::TaintPropagationRule149 inline void addDstArg(unsigned A) { DstArgs.push_back(A); }
150
isNull__anon9e4f7a610111::GenericTaintChecker::TaintPropagationRule151 inline bool isNull() const { return SrcArgs.empty(); }
152
isDestinationArgument__anon9e4f7a610111::GenericTaintChecker::TaintPropagationRule153 inline bool isDestinationArgument(unsigned ArgNum) const {
154 return (std::find(DstArgs.begin(),
155 DstArgs.end(), ArgNum) != DstArgs.end());
156 }
157
isTaintedOrPointsToTainted__anon9e4f7a610111::GenericTaintChecker::TaintPropagationRule158 static inline bool isTaintedOrPointsToTainted(const Expr *E,
159 ProgramStateRef State,
160 CheckerContext &C) {
161 return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) ||
162 (E->getType().getTypePtr()->isPointerType() &&
163 State->isTainted(getPointedToSymbol(C, E))));
164 }
165
166 /// \brief Pre-process a function which propagates taint according to the
167 /// taint rule.
168 ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
169
170 };
171 };
172
173 const unsigned GenericTaintChecker::ReturnValueIndex;
174 const unsigned GenericTaintChecker::InvalidArgIndex;
175
176 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
177 "Untrusted data is used as a format string "
178 "(CWE-134: Uncontrolled Format String)";
179
180 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
181 "Untrusted data is passed to a system call "
182 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
183
184 const char GenericTaintChecker::MsgTaintedBufferSize[] =
185 "Untrusted data is used to specify the buffer size "
186 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
187 "character data and the null terminator)";
188
189 } // end of anonymous namespace
190
191 /// A set which is used to pass information from call pre-visit instruction
192 /// to the call post-visit. The values are unsigned integers, which are either
193 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
194 /// points to data, which should be tainted on return.
REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit,unsigned)195 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
196
197 GenericTaintChecker::TaintPropagationRule
198 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
199 const FunctionDecl *FDecl,
200 StringRef Name,
201 CheckerContext &C) {
202 // TODO: Currently, we might lose precision here: we always mark a return
203 // value as tainted even if it's just a pointer, pointing to tainted data.
204
205 // Check for exact name match for functions without builtin substitutes.
206 TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
207 .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
208 .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
209 .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
210 .Case("getc", TaintPropagationRule(0, ReturnValueIndex))
211 .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
212 .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
213 .Case("getw", TaintPropagationRule(0, ReturnValueIndex))
214 .Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
215 .Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
216 .Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
217 .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
218 .Case("read", TaintPropagationRule(0, 2, 1, true))
219 .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
220 .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
221 .Case("fgets", TaintPropagationRule(2, 0, true))
222 .Case("getline", TaintPropagationRule(2, 0))
223 .Case("getdelim", TaintPropagationRule(3, 0))
224 .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
225 .Default(TaintPropagationRule());
226
227 if (!Rule.isNull())
228 return Rule;
229
230 // Check if it's one of the memory setting/copying functions.
231 // This check is specialized but faster then calling isCLibraryFunction.
232 unsigned BId = 0;
233 if ( (BId = FDecl->getMemoryFunctionKind()) )
234 switch(BId) {
235 case Builtin::BImemcpy:
236 case Builtin::BImemmove:
237 case Builtin::BIstrncpy:
238 case Builtin::BIstrncat:
239 return TaintPropagationRule(1, 2, 0, true);
240 case Builtin::BIstrlcpy:
241 case Builtin::BIstrlcat:
242 return TaintPropagationRule(1, 2, 0, false);
243 case Builtin::BIstrndup:
244 return TaintPropagationRule(0, 1, ReturnValueIndex);
245
246 default:
247 break;
248 };
249
250 // Process all other functions which could be defined as builtins.
251 if (Rule.isNull()) {
252 if (C.isCLibraryFunction(FDecl, "snprintf") ||
253 C.isCLibraryFunction(FDecl, "sprintf"))
254 return TaintPropagationRule(InvalidArgIndex, 0, true);
255 else if (C.isCLibraryFunction(FDecl, "strcpy") ||
256 C.isCLibraryFunction(FDecl, "stpcpy") ||
257 C.isCLibraryFunction(FDecl, "strcat"))
258 return TaintPropagationRule(1, 0, true);
259 else if (C.isCLibraryFunction(FDecl, "bcopy"))
260 return TaintPropagationRule(0, 2, 1, false);
261 else if (C.isCLibraryFunction(FDecl, "strdup") ||
262 C.isCLibraryFunction(FDecl, "strdupa"))
263 return TaintPropagationRule(0, ReturnValueIndex);
264 else if (C.isCLibraryFunction(FDecl, "wcsdup"))
265 return TaintPropagationRule(0, ReturnValueIndex);
266 }
267
268 // Skipping the following functions, since they might be used for cleansing
269 // or smart memory copy:
270 // - memccpy - copying until hitting a special character.
271
272 return TaintPropagationRule();
273 }
274
checkPreStmt(const CallExpr * CE,CheckerContext & C) const275 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
276 CheckerContext &C) const {
277 // Check for errors first.
278 if (checkPre(CE, C))
279 return;
280
281 // Add taint second.
282 addSourcesPre(CE, C);
283 }
284
checkPostStmt(const CallExpr * CE,CheckerContext & C) const285 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
286 CheckerContext &C) const {
287 if (propagateFromPre(CE, C))
288 return;
289 addSourcesPost(CE, C);
290 }
291
addSourcesPre(const CallExpr * CE,CheckerContext & C) const292 void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
293 CheckerContext &C) const {
294 ProgramStateRef State = nullptr;
295 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
296 if (!FDecl || FDecl->getKind() != Decl::Function)
297 return;
298
299 StringRef Name = C.getCalleeName(FDecl);
300 if (Name.empty())
301 return;
302
303 // First, try generating a propagation rule for this function.
304 TaintPropagationRule Rule =
305 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
306 if (!Rule.isNull()) {
307 State = Rule.process(CE, C);
308 if (!State)
309 return;
310 C.addTransition(State);
311 return;
312 }
313
314 // Otherwise, check if we have custom pre-processing implemented.
315 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
316 .Case("fscanf", &GenericTaintChecker::preFscanf)
317 .Default(nullptr);
318 // Check and evaluate the call.
319 if (evalFunction)
320 State = (this->*evalFunction)(CE, C);
321 if (!State)
322 return;
323 C.addTransition(State);
324
325 }
326
propagateFromPre(const CallExpr * CE,CheckerContext & C) const327 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
328 CheckerContext &C) const {
329 ProgramStateRef State = C.getState();
330
331 // Depending on what was tainted at pre-visit, we determined a set of
332 // arguments which should be tainted after the function returns. These are
333 // stored in the state as TaintArgsOnPostVisit set.
334 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
335 if (TaintArgs.isEmpty())
336 return false;
337
338 for (llvm::ImmutableSet<unsigned>::iterator
339 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
340 unsigned ArgNum = *I;
341
342 // Special handling for the tainted return value.
343 if (ArgNum == ReturnValueIndex) {
344 State = State->addTaint(CE, C.getLocationContext());
345 continue;
346 }
347
348 // The arguments are pointer arguments. The data they are pointing at is
349 // tainted after the call.
350 if (CE->getNumArgs() < (ArgNum + 1))
351 return false;
352 const Expr* Arg = CE->getArg(ArgNum);
353 SymbolRef Sym = getPointedToSymbol(C, Arg);
354 if (Sym)
355 State = State->addTaint(Sym);
356 }
357
358 // Clear up the taint info from the state.
359 State = State->remove<TaintArgsOnPostVisit>();
360
361 if (State != C.getState()) {
362 C.addTransition(State);
363 return true;
364 }
365 return false;
366 }
367
addSourcesPost(const CallExpr * CE,CheckerContext & C) const368 void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
369 CheckerContext &C) const {
370 // Define the attack surface.
371 // Set the evaluation function by switching on the callee name.
372 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
373 if (!FDecl || FDecl->getKind() != Decl::Function)
374 return;
375
376 StringRef Name = C.getCalleeName(FDecl);
377 if (Name.empty())
378 return;
379 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
380 .Case("scanf", &GenericTaintChecker::postScanf)
381 // TODO: Add support for vfscanf & family.
382 .Case("getchar", &GenericTaintChecker::postRetTaint)
383 .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
384 .Case("getenv", &GenericTaintChecker::postRetTaint)
385 .Case("fopen", &GenericTaintChecker::postRetTaint)
386 .Case("fdopen", &GenericTaintChecker::postRetTaint)
387 .Case("freopen", &GenericTaintChecker::postRetTaint)
388 .Case("getch", &GenericTaintChecker::postRetTaint)
389 .Case("wgetch", &GenericTaintChecker::postRetTaint)
390 .Case("socket", &GenericTaintChecker::postSocket)
391 .Default(nullptr);
392
393 // If the callee isn't defined, it is not of security concern.
394 // Check and evaluate the call.
395 ProgramStateRef State = nullptr;
396 if (evalFunction)
397 State = (this->*evalFunction)(CE, C);
398 if (!State)
399 return;
400
401 C.addTransition(State);
402 }
403
checkPre(const CallExpr * CE,CheckerContext & C) const404 bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
405
406 if (checkUncontrolledFormatString(CE, C))
407 return true;
408
409 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
410 if (!FDecl || FDecl->getKind() != Decl::Function)
411 return false;
412
413 StringRef Name = C.getCalleeName(FDecl);
414 if (Name.empty())
415 return false;
416
417 if (checkSystemCall(CE, Name, C))
418 return true;
419
420 if (checkTaintedBufferSize(CE, FDecl, C))
421 return true;
422
423 return false;
424 }
425
getPointedToSymbol(CheckerContext & C,const Expr * Arg)426 SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
427 const Expr* Arg) {
428 ProgramStateRef State = C.getState();
429 SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
430 if (AddrVal.isUnknownOrUndef())
431 return nullptr;
432
433 Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
434 if (!AddrLoc)
435 return nullptr;
436
437 const PointerType *ArgTy =
438 dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
439 SVal Val = State->getSVal(*AddrLoc,
440 ArgTy ? ArgTy->getPointeeType(): QualType());
441 return Val.getAsSymbol();
442 }
443
444 ProgramStateRef
process(const CallExpr * CE,CheckerContext & C) const445 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
446 CheckerContext &C) const {
447 ProgramStateRef State = C.getState();
448
449 // Check for taint in arguments.
450 bool IsTainted = false;
451 for (ArgVector::const_iterator I = SrcArgs.begin(),
452 E = SrcArgs.end(); I != E; ++I) {
453 unsigned ArgNum = *I;
454
455 if (ArgNum == InvalidArgIndex) {
456 // Check if any of the arguments is tainted, but skip the
457 // destination arguments.
458 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
459 if (isDestinationArgument(i))
460 continue;
461 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
462 break;
463 }
464 break;
465 }
466
467 if (CE->getNumArgs() < (ArgNum + 1))
468 return State;
469 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
470 break;
471 }
472 if (!IsTainted)
473 return State;
474
475 // Mark the arguments which should be tainted after the function returns.
476 for (ArgVector::const_iterator I = DstArgs.begin(),
477 E = DstArgs.end(); I != E; ++I) {
478 unsigned ArgNum = *I;
479
480 // Should we mark all arguments as tainted?
481 if (ArgNum == InvalidArgIndex) {
482 // For all pointer and references that were passed in:
483 // If they are not pointing to const data, mark data as tainted.
484 // TODO: So far we are just going one level down; ideally we'd need to
485 // recurse here.
486 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
487 const Expr *Arg = CE->getArg(i);
488 // Process pointer argument.
489 const Type *ArgTy = Arg->getType().getTypePtr();
490 QualType PType = ArgTy->getPointeeType();
491 if ((!PType.isNull() && !PType.isConstQualified())
492 || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
493 State = State->add<TaintArgsOnPostVisit>(i);
494 }
495 continue;
496 }
497
498 // Should mark the return value?
499 if (ArgNum == ReturnValueIndex) {
500 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
501 continue;
502 }
503
504 // Mark the given argument.
505 assert(ArgNum < CE->getNumArgs());
506 State = State->add<TaintArgsOnPostVisit>(ArgNum);
507 }
508
509 return State;
510 }
511
512
513 // If argument 0 (file descriptor) is tainted, all arguments except for arg 0
514 // and arg 1 should get taint.
preFscanf(const CallExpr * CE,CheckerContext & C) const515 ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
516 CheckerContext &C) const {
517 assert(CE->getNumArgs() >= 2);
518 ProgramStateRef State = C.getState();
519
520 // Check is the file descriptor is tainted.
521 if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
522 isStdin(CE->getArg(0), C)) {
523 // All arguments except for the first two should get taint.
524 for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
525 State = State->add<TaintArgsOnPostVisit>(i);
526 return State;
527 }
528
529 return nullptr;
530 }
531
532
533 // If argument 0(protocol domain) is network, the return value should get taint.
postSocket(const CallExpr * CE,CheckerContext & C) const534 ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
535 CheckerContext &C) const {
536 ProgramStateRef State = C.getState();
537 if (CE->getNumArgs() < 3)
538 return State;
539
540 SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
541 StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
542 // White list the internal communication protocols.
543 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
544 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
545 return State;
546 State = State->addTaint(CE, C.getLocationContext());
547 return State;
548 }
549
postScanf(const CallExpr * CE,CheckerContext & C) const550 ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
551 CheckerContext &C) const {
552 ProgramStateRef State = C.getState();
553 if (CE->getNumArgs() < 2)
554 return State;
555
556 // All arguments except for the very first one should get taint.
557 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
558 // The arguments are pointer arguments. The data they are pointing at is
559 // tainted after the call.
560 const Expr* Arg = CE->getArg(i);
561 SymbolRef Sym = getPointedToSymbol(C, Arg);
562 if (Sym)
563 State = State->addTaint(Sym);
564 }
565 return State;
566 }
567
postRetTaint(const CallExpr * CE,CheckerContext & C) const568 ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
569 CheckerContext &C) const {
570 return C.getState()->addTaint(CE, C.getLocationContext());
571 }
572
isStdin(const Expr * E,CheckerContext & C)573 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
574 ProgramStateRef State = C.getState();
575 SVal Val = State->getSVal(E, C.getLocationContext());
576
577 // stdin is a pointer, so it would be a region.
578 const MemRegion *MemReg = Val.getAsRegion();
579
580 // The region should be symbolic, we do not know it's value.
581 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
582 if (!SymReg)
583 return false;
584
585 // Get it's symbol and find the declaration region it's pointing to.
586 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
587 if (!Sm)
588 return false;
589 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
590 if (!DeclReg)
591 return false;
592
593 // This region corresponds to a declaration, find out if it's a global/extern
594 // variable named stdin with the proper type.
595 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
596 D = D->getCanonicalDecl();
597 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
598 if (const PointerType * PtrTy =
599 dyn_cast<PointerType>(D->getType().getTypePtr()))
600 if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
601 return true;
602 }
603 return false;
604 }
605
getPrintfFormatArgumentNum(const CallExpr * CE,const CheckerContext & C,unsigned int & ArgNum)606 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
607 const CheckerContext &C,
608 unsigned int &ArgNum) {
609 // Find if the function contains a format string argument.
610 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
611 // vsnprintf, syslog, custom annotated functions.
612 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
613 if (!FDecl)
614 return false;
615 for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
616 ArgNum = Format->getFormatIdx() - 1;
617 if ((Format->getType()->getName() == "printf") &&
618 CE->getNumArgs() > ArgNum)
619 return true;
620 }
621
622 // Or if a function is named setproctitle (this is a heuristic).
623 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
624 ArgNum = 0;
625 return true;
626 }
627
628 return false;
629 }
630
generateReportIfTainted(const Expr * E,const char Msg[],CheckerContext & C) const631 bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
632 const char Msg[],
633 CheckerContext &C) const {
634 assert(E);
635
636 // Check for taint.
637 ProgramStateRef State = C.getState();
638 if (!State->isTainted(getPointedToSymbol(C, E)) &&
639 !State->isTainted(E, C.getLocationContext()))
640 return false;
641
642 // Generate diagnostic.
643 if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
644 initBugType();
645 auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
646 report->addRange(E->getSourceRange());
647 C.emitReport(std::move(report));
648 return true;
649 }
650 return false;
651 }
652
checkUncontrolledFormatString(const CallExpr * CE,CheckerContext & C) const653 bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
654 CheckerContext &C) const{
655 // Check if the function contains a format string argument.
656 unsigned int ArgNum = 0;
657 if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
658 return false;
659
660 // If either the format string content or the pointer itself are tainted, warn.
661 return generateReportIfTainted(CE->getArg(ArgNum),
662 MsgUncontrolledFormatString, C);
663 }
664
checkSystemCall(const CallExpr * CE,StringRef Name,CheckerContext & C) const665 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
666 StringRef Name,
667 CheckerContext &C) const {
668 // TODO: It might make sense to run this check on demand. In some cases,
669 // we should check if the environment has been cleansed here. We also might
670 // need to know if the user was reset before these calls(seteuid).
671 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
672 .Case("system", 0)
673 .Case("popen", 0)
674 .Case("execl", 0)
675 .Case("execle", 0)
676 .Case("execlp", 0)
677 .Case("execv", 0)
678 .Case("execvp", 0)
679 .Case("execvP", 0)
680 .Case("execve", 0)
681 .Case("dlopen", 0)
682 .Default(UINT_MAX);
683
684 if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
685 return false;
686
687 return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
688 }
689
690 // TODO: Should this check be a part of the CString checker?
691 // If yes, should taint be a global setting?
checkTaintedBufferSize(const CallExpr * CE,const FunctionDecl * FDecl,CheckerContext & C) const692 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
693 const FunctionDecl *FDecl,
694 CheckerContext &C) const {
695 // If the function has a buffer size argument, set ArgNum.
696 unsigned ArgNum = InvalidArgIndex;
697 unsigned BId = 0;
698 if ( (BId = FDecl->getMemoryFunctionKind()) )
699 switch(BId) {
700 case Builtin::BImemcpy:
701 case Builtin::BImemmove:
702 case Builtin::BIstrncpy:
703 ArgNum = 2;
704 break;
705 case Builtin::BIstrndup:
706 ArgNum = 1;
707 break;
708 default:
709 break;
710 };
711
712 if (ArgNum == InvalidArgIndex) {
713 if (C.isCLibraryFunction(FDecl, "malloc") ||
714 C.isCLibraryFunction(FDecl, "calloc") ||
715 C.isCLibraryFunction(FDecl, "alloca"))
716 ArgNum = 0;
717 else if (C.isCLibraryFunction(FDecl, "memccpy"))
718 ArgNum = 3;
719 else if (C.isCLibraryFunction(FDecl, "realloc"))
720 ArgNum = 1;
721 else if (C.isCLibraryFunction(FDecl, "bcopy"))
722 ArgNum = 2;
723 }
724
725 return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
726 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
727 }
728
registerGenericTaintChecker(CheckerManager & mgr)729 void ento::registerGenericTaintChecker(CheckerManager &mgr) {
730 mgr.registerChecker<GenericTaintChecker>();
731 }
732