• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This defines CStringChecker, which is an assortment of checks on calls
11 // to functions in <string.h>.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ClangSACheckers.h"
16 #include "InterCheckerAPI.h"
17 #include "clang/Basic/CharInfo.h"
18 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
19 #include "clang/StaticAnalyzer/Core/Checker.h"
20 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
21 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
22 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringSwitch.h"
26 #include "llvm/Support/raw_ostream.h"
27 
28 using namespace clang;
29 using namespace ento;
30 
31 namespace {
32 class CStringChecker : public Checker< eval::Call,
33                                          check::PreStmt<DeclStmt>,
34                                          check::LiveSymbols,
35                                          check::DeadSymbols,
36                                          check::RegionChanges
37                                          > {
38   mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap,
39       BT_NotCString, BT_AdditionOverflow;
40 
41   mutable const char *CurrentFunctionDescription;
42 
43 public:
44   /// The filter is used to filter out the diagnostics which are not enabled by
45   /// the user.
46   struct CStringChecksFilter {
47     DefaultBool CheckCStringNullArg;
48     DefaultBool CheckCStringOutOfBounds;
49     DefaultBool CheckCStringBufferOverlap;
50     DefaultBool CheckCStringNotNullTerm;
51 
52     CheckName CheckNameCStringNullArg;
53     CheckName CheckNameCStringOutOfBounds;
54     CheckName CheckNameCStringBufferOverlap;
55     CheckName CheckNameCStringNotNullTerm;
56   };
57 
58   CStringChecksFilter Filter;
59 
getTag()60   static void *getTag() { static int tag; return &tag; }
61 
62   bool evalCall(const CallExpr *CE, CheckerContext &C) const;
63   void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
64   void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
65   void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
66   bool wantsRegionChangeUpdate(ProgramStateRef state) const;
67 
68   ProgramStateRef
69     checkRegionChanges(ProgramStateRef state,
70                        const InvalidatedSymbols *,
71                        ArrayRef<const MemRegion *> ExplicitRegions,
72                        ArrayRef<const MemRegion *> Regions,
73                        const CallEvent *Call) const;
74 
75   typedef void (CStringChecker::*FnCheck)(CheckerContext &,
76                                           const CallExpr *) const;
77 
78   void evalMemcpy(CheckerContext &C, const CallExpr *CE) const;
79   void evalMempcpy(CheckerContext &C, const CallExpr *CE) const;
80   void evalMemmove(CheckerContext &C, const CallExpr *CE) const;
81   void evalBcopy(CheckerContext &C, const CallExpr *CE) const;
82   void evalCopyCommon(CheckerContext &C, const CallExpr *CE,
83                       ProgramStateRef state,
84                       const Expr *Size,
85                       const Expr *Source,
86                       const Expr *Dest,
87                       bool Restricted = false,
88                       bool IsMempcpy = false) const;
89 
90   void evalMemcmp(CheckerContext &C, const CallExpr *CE) const;
91 
92   void evalstrLength(CheckerContext &C, const CallExpr *CE) const;
93   void evalstrnLength(CheckerContext &C, const CallExpr *CE) const;
94   void evalstrLengthCommon(CheckerContext &C,
95                            const CallExpr *CE,
96                            bool IsStrnlen = false) const;
97 
98   void evalStrcpy(CheckerContext &C, const CallExpr *CE) const;
99   void evalStrncpy(CheckerContext &C, const CallExpr *CE) const;
100   void evalStpcpy(CheckerContext &C, const CallExpr *CE) const;
101   void evalStrcpyCommon(CheckerContext &C,
102                         const CallExpr *CE,
103                         bool returnEnd,
104                         bool isBounded,
105                         bool isAppending) const;
106 
107   void evalStrcat(CheckerContext &C, const CallExpr *CE) const;
108   void evalStrncat(CheckerContext &C, const CallExpr *CE) const;
109 
110   void evalStrcmp(CheckerContext &C, const CallExpr *CE) const;
111   void evalStrncmp(CheckerContext &C, const CallExpr *CE) const;
112   void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const;
113   void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const;
114   void evalStrcmpCommon(CheckerContext &C,
115                         const CallExpr *CE,
116                         bool isBounded = false,
117                         bool ignoreCase = false) const;
118 
119   void evalStrsep(CheckerContext &C, const CallExpr *CE) const;
120 
121   // Utility methods
122   std::pair<ProgramStateRef , ProgramStateRef >
123   static assumeZero(CheckerContext &C,
124                     ProgramStateRef state, SVal V, QualType Ty);
125 
126   static ProgramStateRef setCStringLength(ProgramStateRef state,
127                                               const MemRegion *MR,
128                                               SVal strLength);
129   static SVal getCStringLengthForRegion(CheckerContext &C,
130                                         ProgramStateRef &state,
131                                         const Expr *Ex,
132                                         const MemRegion *MR,
133                                         bool hypothetical);
134   SVal getCStringLength(CheckerContext &C,
135                         ProgramStateRef &state,
136                         const Expr *Ex,
137                         SVal Buf,
138                         bool hypothetical = false) const;
139 
140   const StringLiteral *getCStringLiteral(CheckerContext &C,
141                                          ProgramStateRef &state,
142                                          const Expr *expr,
143                                          SVal val) const;
144 
145   static ProgramStateRef InvalidateBuffer(CheckerContext &C,
146                                           ProgramStateRef state,
147                                           const Expr *Ex, SVal V,
148                                           bool IsSourceBuffer);
149 
150   static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
151                               const MemRegion *MR);
152 
153   // Re-usable checks
154   ProgramStateRef checkNonNull(CheckerContext &C,
155                                    ProgramStateRef state,
156                                    const Expr *S,
157                                    SVal l) const;
158   ProgramStateRef CheckLocation(CheckerContext &C,
159                                     ProgramStateRef state,
160                                     const Expr *S,
161                                     SVal l,
162                                     const char *message = nullptr) const;
163   ProgramStateRef CheckBufferAccess(CheckerContext &C,
164                                         ProgramStateRef state,
165                                         const Expr *Size,
166                                         const Expr *FirstBuf,
167                                         const Expr *SecondBuf,
168                                         const char *firstMessage = nullptr,
169                                         const char *secondMessage = nullptr,
170                                         bool WarnAboutSize = false) const;
171 
CheckBufferAccess(CheckerContext & C,ProgramStateRef state,const Expr * Size,const Expr * Buf,const char * message=nullptr,bool WarnAboutSize=false) const172   ProgramStateRef CheckBufferAccess(CheckerContext &C,
173                                         ProgramStateRef state,
174                                         const Expr *Size,
175                                         const Expr *Buf,
176                                         const char *message = nullptr,
177                                         bool WarnAboutSize = false) const {
178     // This is a convenience override.
179     return CheckBufferAccess(C, state, Size, Buf, nullptr, message, nullptr,
180                              WarnAboutSize);
181   }
182   ProgramStateRef CheckOverlap(CheckerContext &C,
183                                    ProgramStateRef state,
184                                    const Expr *Size,
185                                    const Expr *First,
186                                    const Expr *Second) const;
187   void emitOverlapBug(CheckerContext &C,
188                       ProgramStateRef state,
189                       const Stmt *First,
190                       const Stmt *Second) const;
191 
192   ProgramStateRef checkAdditionOverflow(CheckerContext &C,
193                                             ProgramStateRef state,
194                                             NonLoc left,
195                                             NonLoc right) const;
196 };
197 
198 } //end anonymous namespace
199 
REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength,const MemRegion *,SVal)200 REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
201 
202 //===----------------------------------------------------------------------===//
203 // Individual checks and utility methods.
204 //===----------------------------------------------------------------------===//
205 
206 std::pair<ProgramStateRef , ProgramStateRef >
207 CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V,
208                            QualType Ty) {
209   Optional<DefinedSVal> val = V.getAs<DefinedSVal>();
210   if (!val)
211     return std::pair<ProgramStateRef , ProgramStateRef >(state, state);
212 
213   SValBuilder &svalBuilder = C.getSValBuilder();
214   DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
215   return state->assume(svalBuilder.evalEQ(state, *val, zero));
216 }
217 
checkNonNull(CheckerContext & C,ProgramStateRef state,const Expr * S,SVal l) const218 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
219                                             ProgramStateRef state,
220                                             const Expr *S, SVal l) const {
221   // If a previous check has failed, propagate the failure.
222   if (!state)
223     return nullptr;
224 
225   ProgramStateRef stateNull, stateNonNull;
226   std::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType());
227 
228   if (stateNull && !stateNonNull) {
229     if (!Filter.CheckCStringNullArg)
230       return nullptr;
231 
232     ExplodedNode *N = C.generateSink(stateNull);
233     if (!N)
234       return nullptr;
235 
236     if (!BT_Null)
237       BT_Null.reset(new BuiltinBug(
238           Filter.CheckNameCStringNullArg, categories::UnixAPI,
239           "Null pointer argument in call to byte string function"));
240 
241     SmallString<80> buf;
242     llvm::raw_svector_ostream os(buf);
243     assert(CurrentFunctionDescription);
244     os << "Null pointer argument in call to " << CurrentFunctionDescription;
245 
246     // Generate a report for this bug.
247     BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get());
248     BugReport *report = new BugReport(*BT, os.str(), N);
249 
250     report->addRange(S->getSourceRange());
251     bugreporter::trackNullOrUndefValue(N, S, *report);
252     C.emitReport(report);
253     return nullptr;
254   }
255 
256   // From here on, assume that the value is non-null.
257   assert(stateNonNull);
258   return stateNonNull;
259 }
260 
261 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
CheckLocation(CheckerContext & C,ProgramStateRef state,const Expr * S,SVal l,const char * warningMsg) const262 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
263                                              ProgramStateRef state,
264                                              const Expr *S, SVal l,
265                                              const char *warningMsg) const {
266   // If a previous check has failed, propagate the failure.
267   if (!state)
268     return nullptr;
269 
270   // Check for out of bound array element access.
271   const MemRegion *R = l.getAsRegion();
272   if (!R)
273     return state;
274 
275   const ElementRegion *ER = dyn_cast<ElementRegion>(R);
276   if (!ER)
277     return state;
278 
279   assert(ER->getValueType() == C.getASTContext().CharTy &&
280     "CheckLocation should only be called with char* ElementRegions");
281 
282   // Get the size of the array.
283   const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
284   SValBuilder &svalBuilder = C.getSValBuilder();
285   SVal Extent =
286     svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder));
287   DefinedOrUnknownSVal Size = Extent.castAs<DefinedOrUnknownSVal>();
288 
289   // Get the index of the accessed element.
290   DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
291 
292   ProgramStateRef StInBound = state->assumeInBound(Idx, Size, true);
293   ProgramStateRef StOutBound = state->assumeInBound(Idx, Size, false);
294   if (StOutBound && !StInBound) {
295     ExplodedNode *N = C.generateSink(StOutBound);
296     if (!N)
297       return nullptr;
298 
299     if (!BT_Bounds) {
300       BT_Bounds.reset(new BuiltinBug(
301           Filter.CheckNameCStringOutOfBounds, "Out-of-bound array access",
302           "Byte string function accesses out-of-bound array element"));
303     }
304     BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds.get());
305 
306     // Generate a report for this bug.
307     BugReport *report;
308     if (warningMsg) {
309       report = new BugReport(*BT, warningMsg, N);
310     } else {
311       assert(CurrentFunctionDescription);
312       assert(CurrentFunctionDescription[0] != '\0');
313 
314       SmallString<80> buf;
315       llvm::raw_svector_ostream os(buf);
316       os << toUppercase(CurrentFunctionDescription[0])
317          << &CurrentFunctionDescription[1]
318          << " accesses out-of-bound array element";
319       report = new BugReport(*BT, os.str(), N);
320     }
321 
322     // FIXME: It would be nice to eventually make this diagnostic more clear,
323     // e.g., by referencing the original declaration or by saying *why* this
324     // reference is outside the range.
325 
326     report->addRange(S->getSourceRange());
327     C.emitReport(report);
328     return nullptr;
329   }
330 
331   // Array bound check succeeded.  From this point forward the array bound
332   // should always succeed.
333   return StInBound;
334 }
335 
CheckBufferAccess(CheckerContext & C,ProgramStateRef state,const Expr * Size,const Expr * FirstBuf,const Expr * SecondBuf,const char * firstMessage,const char * secondMessage,bool WarnAboutSize) const336 ProgramStateRef CStringChecker::CheckBufferAccess(CheckerContext &C,
337                                                  ProgramStateRef state,
338                                                  const Expr *Size,
339                                                  const Expr *FirstBuf,
340                                                  const Expr *SecondBuf,
341                                                  const char *firstMessage,
342                                                  const char *secondMessage,
343                                                  bool WarnAboutSize) const {
344   // If a previous check has failed, propagate the failure.
345   if (!state)
346     return nullptr;
347 
348   SValBuilder &svalBuilder = C.getSValBuilder();
349   ASTContext &Ctx = svalBuilder.getContext();
350   const LocationContext *LCtx = C.getLocationContext();
351 
352   QualType sizeTy = Size->getType();
353   QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
354 
355   // Check that the first buffer is non-null.
356   SVal BufVal = state->getSVal(FirstBuf, LCtx);
357   state = checkNonNull(C, state, FirstBuf, BufVal);
358   if (!state)
359     return nullptr;
360 
361   // If out-of-bounds checking is turned off, skip the rest.
362   if (!Filter.CheckCStringOutOfBounds)
363     return state;
364 
365   // Get the access length and make sure it is known.
366   // FIXME: This assumes the caller has already checked that the access length
367   // is positive. And that it's unsigned.
368   SVal LengthVal = state->getSVal(Size, LCtx);
369   Optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
370   if (!Length)
371     return state;
372 
373   // Compute the offset of the last element to be accessed: size-1.
374   NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
375   NonLoc LastOffset = svalBuilder
376       .evalBinOpNN(state, BO_Sub, *Length, One, sizeTy).castAs<NonLoc>();
377 
378   // Check that the first buffer is sufficiently long.
379   SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
380   if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
381     const Expr *warningExpr = (WarnAboutSize ? Size : FirstBuf);
382 
383     SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
384                                           LastOffset, PtrTy);
385     state = CheckLocation(C, state, warningExpr, BufEnd, firstMessage);
386 
387     // If the buffer isn't large enough, abort.
388     if (!state)
389       return nullptr;
390   }
391 
392   // If there's a second buffer, check it as well.
393   if (SecondBuf) {
394     BufVal = state->getSVal(SecondBuf, LCtx);
395     state = checkNonNull(C, state, SecondBuf, BufVal);
396     if (!state)
397       return nullptr;
398 
399     BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType());
400     if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
401       const Expr *warningExpr = (WarnAboutSize ? Size : SecondBuf);
402 
403       SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
404                                             LastOffset, PtrTy);
405       state = CheckLocation(C, state, warningExpr, BufEnd, secondMessage);
406     }
407   }
408 
409   // Large enough or not, return this state!
410   return state;
411 }
412 
CheckOverlap(CheckerContext & C,ProgramStateRef state,const Expr * Size,const Expr * First,const Expr * Second) const413 ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
414                                             ProgramStateRef state,
415                                             const Expr *Size,
416                                             const Expr *First,
417                                             const Expr *Second) const {
418   if (!Filter.CheckCStringBufferOverlap)
419     return state;
420 
421   // Do a simple check for overlap: if the two arguments are from the same
422   // buffer, see if the end of the first is greater than the start of the second
423   // or vice versa.
424 
425   // If a previous check has failed, propagate the failure.
426   if (!state)
427     return nullptr;
428 
429   ProgramStateRef stateTrue, stateFalse;
430 
431   // Get the buffer values and make sure they're known locations.
432   const LocationContext *LCtx = C.getLocationContext();
433   SVal firstVal = state->getSVal(First, LCtx);
434   SVal secondVal = state->getSVal(Second, LCtx);
435 
436   Optional<Loc> firstLoc = firstVal.getAs<Loc>();
437   if (!firstLoc)
438     return state;
439 
440   Optional<Loc> secondLoc = secondVal.getAs<Loc>();
441   if (!secondLoc)
442     return state;
443 
444   // Are the two values the same?
445   SValBuilder &svalBuilder = C.getSValBuilder();
446   std::tie(stateTrue, stateFalse) =
447     state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
448 
449   if (stateTrue && !stateFalse) {
450     // If the values are known to be equal, that's automatically an overlap.
451     emitOverlapBug(C, stateTrue, First, Second);
452     return nullptr;
453   }
454 
455   // assume the two expressions are not equal.
456   assert(stateFalse);
457   state = stateFalse;
458 
459   // Which value comes first?
460   QualType cmpTy = svalBuilder.getConditionType();
461   SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT,
462                                          *firstLoc, *secondLoc, cmpTy);
463   Optional<DefinedOrUnknownSVal> reverseTest =
464       reverse.getAs<DefinedOrUnknownSVal>();
465   if (!reverseTest)
466     return state;
467 
468   std::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
469   if (stateTrue) {
470     if (stateFalse) {
471       // If we don't know which one comes first, we can't perform this test.
472       return state;
473     } else {
474       // Switch the values so that firstVal is before secondVal.
475       std::swap(firstLoc, secondLoc);
476 
477       // Switch the Exprs as well, so that they still correspond.
478       std::swap(First, Second);
479     }
480   }
481 
482   // Get the length, and make sure it too is known.
483   SVal LengthVal = state->getSVal(Size, LCtx);
484   Optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
485   if (!Length)
486     return state;
487 
488   // Convert the first buffer's start address to char*.
489   // Bail out if the cast fails.
490   ASTContext &Ctx = svalBuilder.getContext();
491   QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
492   SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy,
493                                          First->getType());
494   Optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>();
495   if (!FirstStartLoc)
496     return state;
497 
498   // Compute the end of the first buffer. Bail out if THAT fails.
499   SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add,
500                                  *FirstStartLoc, *Length, CharPtrTy);
501   Optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>();
502   if (!FirstEndLoc)
503     return state;
504 
505   // Is the end of the first buffer past the start of the second buffer?
506   SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT,
507                                 *FirstEndLoc, *secondLoc, cmpTy);
508   Optional<DefinedOrUnknownSVal> OverlapTest =
509       Overlap.getAs<DefinedOrUnknownSVal>();
510   if (!OverlapTest)
511     return state;
512 
513   std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
514 
515   if (stateTrue && !stateFalse) {
516     // Overlap!
517     emitOverlapBug(C, stateTrue, First, Second);
518     return nullptr;
519   }
520 
521   // assume the two expressions don't overlap.
522   assert(stateFalse);
523   return stateFalse;
524 }
525 
emitOverlapBug(CheckerContext & C,ProgramStateRef state,const Stmt * First,const Stmt * Second) const526 void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
527                                   const Stmt *First, const Stmt *Second) const {
528   ExplodedNode *N = C.generateSink(state);
529   if (!N)
530     return;
531 
532   if (!BT_Overlap)
533     BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap,
534                                  categories::UnixAPI, "Improper arguments"));
535 
536   // Generate a report for this bug.
537   BugReport *report =
538     new BugReport(*BT_Overlap,
539       "Arguments must not be overlapping buffers", N);
540   report->addRange(First->getSourceRange());
541   report->addRange(Second->getSourceRange());
542 
543   C.emitReport(report);
544 }
545 
checkAdditionOverflow(CheckerContext & C,ProgramStateRef state,NonLoc left,NonLoc right) const546 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
547                                                      ProgramStateRef state,
548                                                      NonLoc left,
549                                                      NonLoc right) const {
550   // If out-of-bounds checking is turned off, skip the rest.
551   if (!Filter.CheckCStringOutOfBounds)
552     return state;
553 
554   // If a previous check has failed, propagate the failure.
555   if (!state)
556     return nullptr;
557 
558   SValBuilder &svalBuilder = C.getSValBuilder();
559   BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
560 
561   QualType sizeTy = svalBuilder.getContext().getSizeType();
562   const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
563   NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
564 
565   SVal maxMinusRight;
566   if (right.getAs<nonloc::ConcreteInt>()) {
567     maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
568                                                  sizeTy);
569   } else {
570     // Try switching the operands. (The order of these two assignments is
571     // important!)
572     maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
573                                             sizeTy);
574     left = right;
575   }
576 
577   if (Optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) {
578     QualType cmpTy = svalBuilder.getConditionType();
579     // If left > max - right, we have an overflow.
580     SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
581                                                 *maxMinusRightNL, cmpTy);
582 
583     ProgramStateRef stateOverflow, stateOkay;
584     std::tie(stateOverflow, stateOkay) =
585       state->assume(willOverflow.castAs<DefinedOrUnknownSVal>());
586 
587     if (stateOverflow && !stateOkay) {
588       // We have an overflow. Emit a bug report.
589       ExplodedNode *N = C.generateSink(stateOverflow);
590       if (!N)
591         return nullptr;
592 
593       if (!BT_AdditionOverflow)
594         BT_AdditionOverflow.reset(
595             new BuiltinBug(Filter.CheckNameCStringOutOfBounds, "API",
596                            "Sum of expressions causes overflow"));
597 
598       // This isn't a great error message, but this should never occur in real
599       // code anyway -- you'd have to create a buffer longer than a size_t can
600       // represent, which is sort of a contradiction.
601       const char *warning =
602         "This expression will create a string whose length is too big to "
603         "be represented as a size_t";
604 
605       // Generate a report for this bug.
606       BugReport *report = new BugReport(*BT_AdditionOverflow, warning, N);
607       C.emitReport(report);
608 
609       return nullptr;
610     }
611 
612     // From now on, assume an overflow didn't occur.
613     assert(stateOkay);
614     state = stateOkay;
615   }
616 
617   return state;
618 }
619 
setCStringLength(ProgramStateRef state,const MemRegion * MR,SVal strLength)620 ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
621                                                 const MemRegion *MR,
622                                                 SVal strLength) {
623   assert(!strLength.isUndef() && "Attempt to set an undefined string length");
624 
625   MR = MR->StripCasts();
626 
627   switch (MR->getKind()) {
628   case MemRegion::StringRegionKind:
629     // FIXME: This can happen if we strcpy() into a string region. This is
630     // undefined [C99 6.4.5p6], but we should still warn about it.
631     return state;
632 
633   case MemRegion::SymbolicRegionKind:
634   case MemRegion::AllocaRegionKind:
635   case MemRegion::VarRegionKind:
636   case MemRegion::FieldRegionKind:
637   case MemRegion::ObjCIvarRegionKind:
638     // These are the types we can currently track string lengths for.
639     break;
640 
641   case MemRegion::ElementRegionKind:
642     // FIXME: Handle element regions by upper-bounding the parent region's
643     // string length.
644     return state;
645 
646   default:
647     // Other regions (mostly non-data) can't have a reliable C string length.
648     // For now, just ignore the change.
649     // FIXME: These are rare but not impossible. We should output some kind of
650     // warning for things like strcpy((char[]){'a', 0}, "b");
651     return state;
652   }
653 
654   if (strLength.isUnknown())
655     return state->remove<CStringLength>(MR);
656 
657   return state->set<CStringLength>(MR, strLength);
658 }
659 
getCStringLengthForRegion(CheckerContext & C,ProgramStateRef & state,const Expr * Ex,const MemRegion * MR,bool hypothetical)660 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
661                                                ProgramStateRef &state,
662                                                const Expr *Ex,
663                                                const MemRegion *MR,
664                                                bool hypothetical) {
665   if (!hypothetical) {
666     // If there's a recorded length, go ahead and return it.
667     const SVal *Recorded = state->get<CStringLength>(MR);
668     if (Recorded)
669       return *Recorded;
670   }
671 
672   // Otherwise, get a new symbol and update the state.
673   SValBuilder &svalBuilder = C.getSValBuilder();
674   QualType sizeTy = svalBuilder.getContext().getSizeType();
675   SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
676                                                     MR, Ex, sizeTy,
677                                                     C.blockCount());
678 
679   if (!hypothetical) {
680     if (Optional<NonLoc> strLn = strLength.getAs<NonLoc>()) {
681       // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4
682       BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
683       const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
684       llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4);
685       const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt,
686                                                         fourInt);
687       NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt);
688       SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn,
689                                                 maxLength, sizeTy);
690       state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true);
691     }
692     state = state->set<CStringLength>(MR, strLength);
693   }
694 
695   return strLength;
696 }
697 
getCStringLength(CheckerContext & C,ProgramStateRef & state,const Expr * Ex,SVal Buf,bool hypothetical) const698 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
699                                       const Expr *Ex, SVal Buf,
700                                       bool hypothetical) const {
701   const MemRegion *MR = Buf.getAsRegion();
702   if (!MR) {
703     // If we can't get a region, see if it's something we /know/ isn't a
704     // C string. In the context of locations, the only time we can issue such
705     // a warning is for labels.
706     if (Optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
707       if (!Filter.CheckCStringNotNullTerm)
708         return UndefinedVal();
709 
710       if (ExplodedNode *N = C.addTransition(state)) {
711         if (!BT_NotCString)
712           BT_NotCString.reset(new BuiltinBug(
713               Filter.CheckNameCStringNotNullTerm, categories::UnixAPI,
714               "Argument is not a null-terminated string."));
715 
716         SmallString<120> buf;
717         llvm::raw_svector_ostream os(buf);
718         assert(CurrentFunctionDescription);
719         os << "Argument to " << CurrentFunctionDescription
720            << " is the address of the label '" << Label->getLabel()->getName()
721            << "', which is not a null-terminated string";
722 
723         // Generate a report for this bug.
724         BugReport *report = new BugReport(*BT_NotCString, os.str(), N);
725 
726         report->addRange(Ex->getSourceRange());
727         C.emitReport(report);
728       }
729       return UndefinedVal();
730 
731     }
732 
733     // If it's not a region and not a label, give up.
734     return UnknownVal();
735   }
736 
737   // If we have a region, strip casts from it and see if we can figure out
738   // its length. For anything we can't figure out, just return UnknownVal.
739   MR = MR->StripCasts();
740 
741   switch (MR->getKind()) {
742   case MemRegion::StringRegionKind: {
743     // Modifying the contents of string regions is undefined [C99 6.4.5p6],
744     // so we can assume that the byte length is the correct C string length.
745     SValBuilder &svalBuilder = C.getSValBuilder();
746     QualType sizeTy = svalBuilder.getContext().getSizeType();
747     const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
748     return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy);
749   }
750   case MemRegion::SymbolicRegionKind:
751   case MemRegion::AllocaRegionKind:
752   case MemRegion::VarRegionKind:
753   case MemRegion::FieldRegionKind:
754   case MemRegion::ObjCIvarRegionKind:
755     return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
756   case MemRegion::CompoundLiteralRegionKind:
757     // FIXME: Can we track this? Is it necessary?
758     return UnknownVal();
759   case MemRegion::ElementRegionKind:
760     // FIXME: How can we handle this? It's not good enough to subtract the
761     // offset from the base string length; consider "123\x00567" and &a[5].
762     return UnknownVal();
763   default:
764     // Other regions (mostly non-data) can't have a reliable C string length.
765     // In this case, an error is emitted and UndefinedVal is returned.
766     // The caller should always be prepared to handle this case.
767     if (!Filter.CheckCStringNotNullTerm)
768       return UndefinedVal();
769 
770     if (ExplodedNode *N = C.addTransition(state)) {
771       if (!BT_NotCString)
772         BT_NotCString.reset(new BuiltinBug(
773             Filter.CheckNameCStringNotNullTerm, categories::UnixAPI,
774             "Argument is not a null-terminated string."));
775 
776       SmallString<120> buf;
777       llvm::raw_svector_ostream os(buf);
778 
779       assert(CurrentFunctionDescription);
780       os << "Argument to " << CurrentFunctionDescription << " is ";
781 
782       if (SummarizeRegion(os, C.getASTContext(), MR))
783         os << ", which is not a null-terminated string";
784       else
785         os << "not a null-terminated string";
786 
787       // Generate a report for this bug.
788       BugReport *report = new BugReport(*BT_NotCString,
789                                                         os.str(), N);
790 
791       report->addRange(Ex->getSourceRange());
792       C.emitReport(report);
793     }
794 
795     return UndefinedVal();
796   }
797 }
798 
getCStringLiteral(CheckerContext & C,ProgramStateRef & state,const Expr * expr,SVal val) const799 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
800   ProgramStateRef &state, const Expr *expr, SVal val) const {
801 
802   // Get the memory region pointed to by the val.
803   const MemRegion *bufRegion = val.getAsRegion();
804   if (!bufRegion)
805     return nullptr;
806 
807   // Strip casts off the memory region.
808   bufRegion = bufRegion->StripCasts();
809 
810   // Cast the memory region to a string region.
811   const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
812   if (!strRegion)
813     return nullptr;
814 
815   // Return the actual string in the string region.
816   return strRegion->getStringLiteral();
817 }
818 
InvalidateBuffer(CheckerContext & C,ProgramStateRef state,const Expr * E,SVal V,bool IsSourceBuffer)819 ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C,
820                                                  ProgramStateRef state,
821                                                  const Expr *E, SVal V,
822                                                  bool IsSourceBuffer) {
823   Optional<Loc> L = V.getAs<Loc>();
824   if (!L)
825     return state;
826 
827   // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
828   // some assumptions about the value that CFRefCount can't. Even so, it should
829   // probably be refactored.
830   if (Optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) {
831     const MemRegion *R = MR->getRegion()->StripCasts();
832 
833     // Are we dealing with an ElementRegion?  If so, we should be invalidating
834     // the super-region.
835     if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
836       R = ER->getSuperRegion();
837       // FIXME: What about layers of ElementRegions?
838     }
839 
840     // Invalidate this region.
841     const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
842 
843     bool CausesPointerEscape = false;
844     RegionAndSymbolInvalidationTraits ITraits;
845     // Invalidate and escape only indirect regions accessible through the source
846     // buffer.
847     if (IsSourceBuffer) {
848       ITraits.setTrait(R,
849                        RegionAndSymbolInvalidationTraits::TK_PreserveContents);
850       ITraits.setTrait(R, RegionAndSymbolInvalidationTraits::TK_SuppressEscape);
851       CausesPointerEscape = true;
852     }
853 
854     return state->invalidateRegions(R, E, C.blockCount(), LCtx,
855                                     CausesPointerEscape, nullptr, nullptr,
856                                     &ITraits);
857   }
858 
859   // If we have a non-region value by chance, just remove the binding.
860   // FIXME: is this necessary or correct? This handles the non-Region
861   //  cases.  Is it ever valid to store to these?
862   return state->killBinding(*L);
863 }
864 
SummarizeRegion(raw_ostream & os,ASTContext & Ctx,const MemRegion * MR)865 bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
866                                      const MemRegion *MR) {
867   const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(MR);
868 
869   switch (MR->getKind()) {
870   case MemRegion::FunctionTextRegionKind: {
871     const NamedDecl *FD = cast<FunctionTextRegion>(MR)->getDecl();
872     if (FD)
873       os << "the address of the function '" << *FD << '\'';
874     else
875       os << "the address of a function";
876     return true;
877   }
878   case MemRegion::BlockTextRegionKind:
879     os << "block text";
880     return true;
881   case MemRegion::BlockDataRegionKind:
882     os << "a block";
883     return true;
884   case MemRegion::CXXThisRegionKind:
885   case MemRegion::CXXTempObjectRegionKind:
886     os << "a C++ temp object of type " << TVR->getValueType().getAsString();
887     return true;
888   case MemRegion::VarRegionKind:
889     os << "a variable of type" << TVR->getValueType().getAsString();
890     return true;
891   case MemRegion::FieldRegionKind:
892     os << "a field of type " << TVR->getValueType().getAsString();
893     return true;
894   case MemRegion::ObjCIvarRegionKind:
895     os << "an instance variable of type " << TVR->getValueType().getAsString();
896     return true;
897   default:
898     return false;
899   }
900 }
901 
902 //===----------------------------------------------------------------------===//
903 // evaluation of individual function calls.
904 //===----------------------------------------------------------------------===//
905 
evalCopyCommon(CheckerContext & C,const CallExpr * CE,ProgramStateRef state,const Expr * Size,const Expr * Dest,const Expr * Source,bool Restricted,bool IsMempcpy) const906 void CStringChecker::evalCopyCommon(CheckerContext &C,
907                                     const CallExpr *CE,
908                                     ProgramStateRef state,
909                                     const Expr *Size, const Expr *Dest,
910                                     const Expr *Source, bool Restricted,
911                                     bool IsMempcpy) const {
912   CurrentFunctionDescription = "memory copy function";
913 
914   // See if the size argument is zero.
915   const LocationContext *LCtx = C.getLocationContext();
916   SVal sizeVal = state->getSVal(Size, LCtx);
917   QualType sizeTy = Size->getType();
918 
919   ProgramStateRef stateZeroSize, stateNonZeroSize;
920   std::tie(stateZeroSize, stateNonZeroSize) =
921     assumeZero(C, state, sizeVal, sizeTy);
922 
923   // Get the value of the Dest.
924   SVal destVal = state->getSVal(Dest, LCtx);
925 
926   // If the size is zero, there won't be any actual memory access, so
927   // just bind the return value to the destination buffer and return.
928   if (stateZeroSize && !stateNonZeroSize) {
929     stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal);
930     C.addTransition(stateZeroSize);
931     return;
932   }
933 
934   // If the size can be nonzero, we have to check the other arguments.
935   if (stateNonZeroSize) {
936     state = stateNonZeroSize;
937 
938     // Ensure the destination is not null. If it is NULL there will be a
939     // NULL pointer dereference.
940     state = checkNonNull(C, state, Dest, destVal);
941     if (!state)
942       return;
943 
944     // Get the value of the Src.
945     SVal srcVal = state->getSVal(Source, LCtx);
946 
947     // Ensure the source is not null. If it is NULL there will be a
948     // NULL pointer dereference.
949     state = checkNonNull(C, state, Source, srcVal);
950     if (!state)
951       return;
952 
953     // Ensure the accesses are valid and that the buffers do not overlap.
954     const char * const writeWarning =
955       "Memory copy function overflows destination buffer";
956     state = CheckBufferAccess(C, state, Size, Dest, Source,
957                               writeWarning, /* sourceWarning = */ nullptr);
958     if (Restricted)
959       state = CheckOverlap(C, state, Size, Dest, Source);
960 
961     if (!state)
962       return;
963 
964     // If this is mempcpy, get the byte after the last byte copied and
965     // bind the expr.
966     if (IsMempcpy) {
967       loc::MemRegionVal destRegVal = destVal.castAs<loc::MemRegionVal>();
968 
969       // Get the length to copy.
970       if (Optional<NonLoc> lenValNonLoc = sizeVal.getAs<NonLoc>()) {
971         // Get the byte after the last byte copied.
972         SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add,
973                                                           destRegVal,
974                                                           *lenValNonLoc,
975                                                           Dest->getType());
976 
977         // The byte after the last byte copied is the return value.
978         state = state->BindExpr(CE, LCtx, lastElement);
979       } else {
980         // If we don't know how much we copied, we can at least
981         // conjure a return value for later.
982         SVal result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
983                                                           C.blockCount());
984         state = state->BindExpr(CE, LCtx, result);
985       }
986 
987     } else {
988       // All other copies return the destination buffer.
989       // (Well, bcopy() has a void return type, but this won't hurt.)
990       state = state->BindExpr(CE, LCtx, destVal);
991     }
992 
993     // Invalidate the destination (regular invalidation without pointer-escaping
994     // the address of the top-level region).
995     // FIXME: Even if we can't perfectly model the copy, we should see if we
996     // can use LazyCompoundVals to copy the source values into the destination.
997     // This would probably remove any existing bindings past the end of the
998     // copied region, but that's still an improvement over blank invalidation.
999     state = InvalidateBuffer(C, state, Dest, C.getSVal(Dest),
1000                              /*IsSourceBuffer*/false);
1001 
1002     // Invalidate the source (const-invalidation without const-pointer-escaping
1003     // the address of the top-level region).
1004     state = InvalidateBuffer(C, state, Source, C.getSVal(Source),
1005                              /*IsSourceBuffer*/true);
1006 
1007     C.addTransition(state);
1008   }
1009 }
1010 
1011 
evalMemcpy(CheckerContext & C,const CallExpr * CE) const1012 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const {
1013   if (CE->getNumArgs() < 3)
1014     return;
1015 
1016   // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
1017   // The return value is the address of the destination buffer.
1018   const Expr *Dest = CE->getArg(0);
1019   ProgramStateRef state = C.getState();
1020 
1021   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true);
1022 }
1023 
evalMempcpy(CheckerContext & C,const CallExpr * CE) const1024 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const {
1025   if (CE->getNumArgs() < 3)
1026     return;
1027 
1028   // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
1029   // The return value is a pointer to the byte following the last written byte.
1030   const Expr *Dest = CE->getArg(0);
1031   ProgramStateRef state = C.getState();
1032 
1033   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true);
1034 }
1035 
evalMemmove(CheckerContext & C,const CallExpr * CE) const1036 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const {
1037   if (CE->getNumArgs() < 3)
1038     return;
1039 
1040   // void *memmove(void *dst, const void *src, size_t n);
1041   // The return value is the address of the destination buffer.
1042   const Expr *Dest = CE->getArg(0);
1043   ProgramStateRef state = C.getState();
1044 
1045   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1));
1046 }
1047 
evalBcopy(CheckerContext & C,const CallExpr * CE) const1048 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const {
1049   if (CE->getNumArgs() < 3)
1050     return;
1051 
1052   // void bcopy(const void *src, void *dst, size_t n);
1053   evalCopyCommon(C, CE, C.getState(),
1054                  CE->getArg(2), CE->getArg(1), CE->getArg(0));
1055 }
1056 
evalMemcmp(CheckerContext & C,const CallExpr * CE) const1057 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const {
1058   if (CE->getNumArgs() < 3)
1059     return;
1060 
1061   // int memcmp(const void *s1, const void *s2, size_t n);
1062   CurrentFunctionDescription = "memory comparison function";
1063 
1064   const Expr *Left = CE->getArg(0);
1065   const Expr *Right = CE->getArg(1);
1066   const Expr *Size = CE->getArg(2);
1067 
1068   ProgramStateRef state = C.getState();
1069   SValBuilder &svalBuilder = C.getSValBuilder();
1070 
1071   // See if the size argument is zero.
1072   const LocationContext *LCtx = C.getLocationContext();
1073   SVal sizeVal = state->getSVal(Size, LCtx);
1074   QualType sizeTy = Size->getType();
1075 
1076   ProgramStateRef stateZeroSize, stateNonZeroSize;
1077   std::tie(stateZeroSize, stateNonZeroSize) =
1078     assumeZero(C, state, sizeVal, sizeTy);
1079 
1080   // If the size can be zero, the result will be 0 in that case, and we don't
1081   // have to check either of the buffers.
1082   if (stateZeroSize) {
1083     state = stateZeroSize;
1084     state = state->BindExpr(CE, LCtx,
1085                             svalBuilder.makeZeroVal(CE->getType()));
1086     C.addTransition(state);
1087   }
1088 
1089   // If the size can be nonzero, we have to check the other arguments.
1090   if (stateNonZeroSize) {
1091     state = stateNonZeroSize;
1092     // If we know the two buffers are the same, we know the result is 0.
1093     // First, get the two buffers' addresses. Another checker will have already
1094     // made sure they're not undefined.
1095     DefinedOrUnknownSVal LV =
1096         state->getSVal(Left, LCtx).castAs<DefinedOrUnknownSVal>();
1097     DefinedOrUnknownSVal RV =
1098         state->getSVal(Right, LCtx).castAs<DefinedOrUnknownSVal>();
1099 
1100     // See if they are the same.
1101     DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1102     ProgramStateRef StSameBuf, StNotSameBuf;
1103     std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1104 
1105     // If the two arguments might be the same buffer, we know the result is 0,
1106     // and we only need to check one size.
1107     if (StSameBuf) {
1108       state = StSameBuf;
1109       state = CheckBufferAccess(C, state, Size, Left);
1110       if (state) {
1111         state = StSameBuf->BindExpr(CE, LCtx,
1112                                     svalBuilder.makeZeroVal(CE->getType()));
1113         C.addTransition(state);
1114       }
1115     }
1116 
1117     // If the two arguments might be different buffers, we have to check the
1118     // size of both of them.
1119     if (StNotSameBuf) {
1120       state = StNotSameBuf;
1121       state = CheckBufferAccess(C, state, Size, Left, Right);
1122       if (state) {
1123         // The return value is the comparison result, which we don't know.
1124         SVal CmpV = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx,
1125                                                  C.blockCount());
1126         state = state->BindExpr(CE, LCtx, CmpV);
1127         C.addTransition(state);
1128       }
1129     }
1130   }
1131 }
1132 
evalstrLength(CheckerContext & C,const CallExpr * CE) const1133 void CStringChecker::evalstrLength(CheckerContext &C,
1134                                    const CallExpr *CE) const {
1135   if (CE->getNumArgs() < 1)
1136     return;
1137 
1138   // size_t strlen(const char *s);
1139   evalstrLengthCommon(C, CE, /* IsStrnlen = */ false);
1140 }
1141 
evalstrnLength(CheckerContext & C,const CallExpr * CE) const1142 void CStringChecker::evalstrnLength(CheckerContext &C,
1143                                     const CallExpr *CE) const {
1144   if (CE->getNumArgs() < 2)
1145     return;
1146 
1147   // size_t strnlen(const char *s, size_t maxlen);
1148   evalstrLengthCommon(C, CE, /* IsStrnlen = */ true);
1149 }
1150 
evalstrLengthCommon(CheckerContext & C,const CallExpr * CE,bool IsStrnlen) const1151 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
1152                                          bool IsStrnlen) const {
1153   CurrentFunctionDescription = "string length function";
1154   ProgramStateRef state = C.getState();
1155   const LocationContext *LCtx = C.getLocationContext();
1156 
1157   if (IsStrnlen) {
1158     const Expr *maxlenExpr = CE->getArg(1);
1159     SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1160 
1161     ProgramStateRef stateZeroSize, stateNonZeroSize;
1162     std::tie(stateZeroSize, stateNonZeroSize) =
1163       assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1164 
1165     // If the size can be zero, the result will be 0 in that case, and we don't
1166     // have to check the string itself.
1167     if (stateZeroSize) {
1168       SVal zero = C.getSValBuilder().makeZeroVal(CE->getType());
1169       stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero);
1170       C.addTransition(stateZeroSize);
1171     }
1172 
1173     // If the size is GUARANTEED to be zero, we're done!
1174     if (!stateNonZeroSize)
1175       return;
1176 
1177     // Otherwise, record the assumption that the size is nonzero.
1178     state = stateNonZeroSize;
1179   }
1180 
1181   // Check that the string argument is non-null.
1182   const Expr *Arg = CE->getArg(0);
1183   SVal ArgVal = state->getSVal(Arg, LCtx);
1184 
1185   state = checkNonNull(C, state, Arg, ArgVal);
1186 
1187   if (!state)
1188     return;
1189 
1190   SVal strLength = getCStringLength(C, state, Arg, ArgVal);
1191 
1192   // If the argument isn't a valid C string, there's no valid state to
1193   // transition to.
1194   if (strLength.isUndef())
1195     return;
1196 
1197   DefinedOrUnknownSVal result = UnknownVal();
1198 
1199   // If the check is for strnlen() then bind the return value to no more than
1200   // the maxlen value.
1201   if (IsStrnlen) {
1202     QualType cmpTy = C.getSValBuilder().getConditionType();
1203 
1204     // It's a little unfortunate to be getting this again,
1205     // but it's not that expensive...
1206     const Expr *maxlenExpr = CE->getArg(1);
1207     SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1208 
1209     Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1210     Optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>();
1211 
1212     if (strLengthNL && maxlenValNL) {
1213       ProgramStateRef stateStringTooLong, stateStringNotTooLong;
1214 
1215       // Check if the strLength is greater than the maxlen.
1216       std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume(
1217           C.getSValBuilder()
1218               .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy)
1219               .castAs<DefinedOrUnknownSVal>());
1220 
1221       if (stateStringTooLong && !stateStringNotTooLong) {
1222         // If the string is longer than maxlen, return maxlen.
1223         result = *maxlenValNL;
1224       } else if (stateStringNotTooLong && !stateStringTooLong) {
1225         // If the string is shorter than maxlen, return its length.
1226         result = *strLengthNL;
1227       }
1228     }
1229 
1230     if (result.isUnknown()) {
1231       // If we don't have enough information for a comparison, there's
1232       // no guarantee the full string length will actually be returned.
1233       // All we know is the return value is the min of the string length
1234       // and the limit. This is better than nothing.
1235       result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
1236                                                    C.blockCount());
1237       NonLoc resultNL = result.castAs<NonLoc>();
1238 
1239       if (strLengthNL) {
1240         state = state->assume(C.getSValBuilder().evalBinOpNN(
1241                                   state, BO_LE, resultNL, *strLengthNL, cmpTy)
1242                                   .castAs<DefinedOrUnknownSVal>(), true);
1243       }
1244 
1245       if (maxlenValNL) {
1246         state = state->assume(C.getSValBuilder().evalBinOpNN(
1247                                   state, BO_LE, resultNL, *maxlenValNL, cmpTy)
1248                                   .castAs<DefinedOrUnknownSVal>(), true);
1249       }
1250     }
1251 
1252   } else {
1253     // This is a plain strlen(), not strnlen().
1254     result = strLength.castAs<DefinedOrUnknownSVal>();
1255 
1256     // If we don't know the length of the string, conjure a return
1257     // value, so it can be used in constraints, at least.
1258     if (result.isUnknown()) {
1259       result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
1260                                                    C.blockCount());
1261     }
1262   }
1263 
1264   // Bind the return value.
1265   assert(!result.isUnknown() && "Should have conjured a value by now");
1266   state = state->BindExpr(CE, LCtx, result);
1267   C.addTransition(state);
1268 }
1269 
evalStrcpy(CheckerContext & C,const CallExpr * CE) const1270 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const {
1271   if (CE->getNumArgs() < 2)
1272     return;
1273 
1274   // char *strcpy(char *restrict dst, const char *restrict src);
1275   evalStrcpyCommon(C, CE,
1276                    /* returnEnd = */ false,
1277                    /* isBounded = */ false,
1278                    /* isAppending = */ false);
1279 }
1280 
evalStrncpy(CheckerContext & C,const CallExpr * CE) const1281 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const {
1282   if (CE->getNumArgs() < 3)
1283     return;
1284 
1285   // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1286   evalStrcpyCommon(C, CE,
1287                    /* returnEnd = */ false,
1288                    /* isBounded = */ true,
1289                    /* isAppending = */ false);
1290 }
1291 
evalStpcpy(CheckerContext & C,const CallExpr * CE) const1292 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const {
1293   if (CE->getNumArgs() < 2)
1294     return;
1295 
1296   // char *stpcpy(char *restrict dst, const char *restrict src);
1297   evalStrcpyCommon(C, CE,
1298                    /* returnEnd = */ true,
1299                    /* isBounded = */ false,
1300                    /* isAppending = */ false);
1301 }
1302 
evalStrcat(CheckerContext & C,const CallExpr * CE) const1303 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const {
1304   if (CE->getNumArgs() < 2)
1305     return;
1306 
1307   //char *strcat(char *restrict s1, const char *restrict s2);
1308   evalStrcpyCommon(C, CE,
1309                    /* returnEnd = */ false,
1310                    /* isBounded = */ false,
1311                    /* isAppending = */ true);
1312 }
1313 
evalStrncat(CheckerContext & C,const CallExpr * CE) const1314 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const {
1315   if (CE->getNumArgs() < 3)
1316     return;
1317 
1318   //char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1319   evalStrcpyCommon(C, CE,
1320                    /* returnEnd = */ false,
1321                    /* isBounded = */ true,
1322                    /* isAppending = */ true);
1323 }
1324 
evalStrcpyCommon(CheckerContext & C,const CallExpr * CE,bool returnEnd,bool isBounded,bool isAppending) const1325 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
1326                                       bool returnEnd, bool isBounded,
1327                                       bool isAppending) const {
1328   CurrentFunctionDescription = "string copy function";
1329   ProgramStateRef state = C.getState();
1330   const LocationContext *LCtx = C.getLocationContext();
1331 
1332   // Check that the destination is non-null.
1333   const Expr *Dst = CE->getArg(0);
1334   SVal DstVal = state->getSVal(Dst, LCtx);
1335 
1336   state = checkNonNull(C, state, Dst, DstVal);
1337   if (!state)
1338     return;
1339 
1340   // Check that the source is non-null.
1341   const Expr *srcExpr = CE->getArg(1);
1342   SVal srcVal = state->getSVal(srcExpr, LCtx);
1343   state = checkNonNull(C, state, srcExpr, srcVal);
1344   if (!state)
1345     return;
1346 
1347   // Get the string length of the source.
1348   SVal strLength = getCStringLength(C, state, srcExpr, srcVal);
1349 
1350   // If the source isn't a valid C string, give up.
1351   if (strLength.isUndef())
1352     return;
1353 
1354   SValBuilder &svalBuilder = C.getSValBuilder();
1355   QualType cmpTy = svalBuilder.getConditionType();
1356   QualType sizeTy = svalBuilder.getContext().getSizeType();
1357 
1358   // These two values allow checking two kinds of errors:
1359   // - actual overflows caused by a source that doesn't fit in the destination
1360   // - potential overflows caused by a bound that could exceed the destination
1361   SVal amountCopied = UnknownVal();
1362   SVal maxLastElementIndex = UnknownVal();
1363   const char *boundWarning = nullptr;
1364 
1365   // If the function is strncpy, strncat, etc... it is bounded.
1366   if (isBounded) {
1367     // Get the max number of characters to copy.
1368     const Expr *lenExpr = CE->getArg(2);
1369     SVal lenVal = state->getSVal(lenExpr, LCtx);
1370 
1371     // Protect against misdeclared strncpy().
1372     lenVal = svalBuilder.evalCast(lenVal, sizeTy, lenExpr->getType());
1373 
1374     Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1375     Optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>();
1376 
1377     // If we know both values, we might be able to figure out how much
1378     // we're copying.
1379     if (strLengthNL && lenValNL) {
1380       ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
1381 
1382       // Check if the max number to copy is less than the length of the src.
1383       // If the bound is equal to the source length, strncpy won't null-
1384       // terminate the result!
1385       std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume(
1386           svalBuilder.evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy)
1387               .castAs<DefinedOrUnknownSVal>());
1388 
1389       if (stateSourceTooLong && !stateSourceNotTooLong) {
1390         // Max number to copy is less than the length of the src, so the actual
1391         // strLength copied is the max number arg.
1392         state = stateSourceTooLong;
1393         amountCopied = lenVal;
1394 
1395       } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1396         // The source buffer entirely fits in the bound.
1397         state = stateSourceNotTooLong;
1398         amountCopied = strLength;
1399       }
1400     }
1401 
1402     // We still want to know if the bound is known to be too large.
1403     if (lenValNL) {
1404       if (isAppending) {
1405         // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
1406 
1407         // Get the string length of the destination. If the destination is
1408         // memory that can't have a string length, we shouldn't be copying
1409         // into it anyway.
1410         SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1411         if (dstStrLength.isUndef())
1412           return;
1413 
1414         if (Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>()) {
1415           maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Add,
1416                                                         *lenValNL,
1417                                                         *dstStrLengthNL,
1418                                                         sizeTy);
1419           boundWarning = "Size argument is greater than the free space in the "
1420                          "destination buffer";
1421         }
1422 
1423       } else {
1424         // For strncpy, this is just checking that lenVal <= sizeof(dst)
1425         // (Yes, strncpy and strncat differ in how they treat termination.
1426         // strncat ALWAYS terminates, but strncpy doesn't.)
1427 
1428         // We need a special case for when the copy size is zero, in which
1429         // case strncpy will do no work at all. Our bounds check uses n-1
1430         // as the last element accessed, so n == 0 is problematic.
1431         ProgramStateRef StateZeroSize, StateNonZeroSize;
1432         std::tie(StateZeroSize, StateNonZeroSize) =
1433           assumeZero(C, state, *lenValNL, sizeTy);
1434 
1435         // If the size is known to be zero, we're done.
1436         if (StateZeroSize && !StateNonZeroSize) {
1437           StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal);
1438           C.addTransition(StateZeroSize);
1439           return;
1440         }
1441 
1442         // Otherwise, go ahead and figure out the last element we'll touch.
1443         // We don't record the non-zero assumption here because we can't
1444         // be sure. We won't warn on a possible zero.
1445         NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
1446         maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
1447                                                       one, sizeTy);
1448         boundWarning = "Size argument is greater than the length of the "
1449                        "destination buffer";
1450       }
1451     }
1452 
1453     // If we couldn't pin down the copy length, at least bound it.
1454     // FIXME: We should actually run this code path for append as well, but
1455     // right now it creates problems with constraints (since we can end up
1456     // trying to pass constraints from symbol to symbol).
1457     if (amountCopied.isUnknown() && !isAppending) {
1458       // Try to get a "hypothetical" string length symbol, which we can later
1459       // set as a real value if that turns out to be the case.
1460       amountCopied = getCStringLength(C, state, lenExpr, srcVal, true);
1461       assert(!amountCopied.isUndef());
1462 
1463       if (Optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>()) {
1464         if (lenValNL) {
1465           // amountCopied <= lenVal
1466           SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE,
1467                                                              *amountCopiedNL,
1468                                                              *lenValNL,
1469                                                              cmpTy);
1470           state = state->assume(
1471               copiedLessThanBound.castAs<DefinedOrUnknownSVal>(), true);
1472           if (!state)
1473             return;
1474         }
1475 
1476         if (strLengthNL) {
1477           // amountCopied <= strlen(source)
1478           SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE,
1479                                                            *amountCopiedNL,
1480                                                            *strLengthNL,
1481                                                            cmpTy);
1482           state = state->assume(
1483               copiedLessThanSrc.castAs<DefinedOrUnknownSVal>(), true);
1484           if (!state)
1485             return;
1486         }
1487       }
1488     }
1489 
1490   } else {
1491     // The function isn't bounded. The amount copied should match the length
1492     // of the source buffer.
1493     amountCopied = strLength;
1494   }
1495 
1496   assert(state);
1497 
1498   // This represents the number of characters copied into the destination
1499   // buffer. (It may not actually be the strlen if the destination buffer
1500   // is not terminated.)
1501   SVal finalStrLength = UnknownVal();
1502 
1503   // If this is an appending function (strcat, strncat...) then set the
1504   // string length to strlen(src) + strlen(dst) since the buffer will
1505   // ultimately contain both.
1506   if (isAppending) {
1507     // Get the string length of the destination. If the destination is memory
1508     // that can't have a string length, we shouldn't be copying into it anyway.
1509     SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1510     if (dstStrLength.isUndef())
1511       return;
1512 
1513     Optional<NonLoc> srcStrLengthNL = amountCopied.getAs<NonLoc>();
1514     Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>();
1515 
1516     // If we know both string lengths, we might know the final string length.
1517     if (srcStrLengthNL && dstStrLengthNL) {
1518       // Make sure the two lengths together don't overflow a size_t.
1519       state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL);
1520       if (!state)
1521         return;
1522 
1523       finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL,
1524                                                *dstStrLengthNL, sizeTy);
1525     }
1526 
1527     // If we couldn't get a single value for the final string length,
1528     // we can at least bound it by the individual lengths.
1529     if (finalStrLength.isUnknown()) {
1530       // Try to get a "hypothetical" string length symbol, which we can later
1531       // set as a real value if that turns out to be the case.
1532       finalStrLength = getCStringLength(C, state, CE, DstVal, true);
1533       assert(!finalStrLength.isUndef());
1534 
1535       if (Optional<NonLoc> finalStrLengthNL = finalStrLength.getAs<NonLoc>()) {
1536         if (srcStrLengthNL) {
1537           // finalStrLength >= srcStrLength
1538           SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1539                                                         *finalStrLengthNL,
1540                                                         *srcStrLengthNL,
1541                                                         cmpTy);
1542           state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(),
1543                                 true);
1544           if (!state)
1545             return;
1546         }
1547 
1548         if (dstStrLengthNL) {
1549           // finalStrLength >= dstStrLength
1550           SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1551                                                       *finalStrLengthNL,
1552                                                       *dstStrLengthNL,
1553                                                       cmpTy);
1554           state =
1555               state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true);
1556           if (!state)
1557             return;
1558         }
1559       }
1560     }
1561 
1562   } else {
1563     // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
1564     // the final string length will match the input string length.
1565     finalStrLength = amountCopied;
1566   }
1567 
1568   // The final result of the function will either be a pointer past the last
1569   // copied element, or a pointer to the start of the destination buffer.
1570   SVal Result = (returnEnd ? UnknownVal() : DstVal);
1571 
1572   assert(state);
1573 
1574   // If the destination is a MemRegion, try to check for a buffer overflow and
1575   // record the new string length.
1576   if (Optional<loc::MemRegionVal> dstRegVal =
1577           DstVal.getAs<loc::MemRegionVal>()) {
1578     QualType ptrTy = Dst->getType();
1579 
1580     // If we have an exact value on a bounded copy, use that to check for
1581     // overflows, rather than our estimate about how much is actually copied.
1582     if (boundWarning) {
1583       if (Optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) {
1584         SVal maxLastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1585                                                       *maxLastNL, ptrTy);
1586         state = CheckLocation(C, state, CE->getArg(2), maxLastElement,
1587                               boundWarning);
1588         if (!state)
1589           return;
1590       }
1591     }
1592 
1593     // Then, if the final length is known...
1594     if (Optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) {
1595       SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1596                                                  *knownStrLength, ptrTy);
1597 
1598       // ...and we haven't checked the bound, we'll check the actual copy.
1599       if (!boundWarning) {
1600         const char * const warningMsg =
1601           "String copy function overflows destination buffer";
1602         state = CheckLocation(C, state, Dst, lastElement, warningMsg);
1603         if (!state)
1604           return;
1605       }
1606 
1607       // If this is a stpcpy-style copy, the last element is the return value.
1608       if (returnEnd)
1609         Result = lastElement;
1610     }
1611 
1612     // Invalidate the destination (regular invalidation without pointer-escaping
1613     // the address of the top-level region). This must happen before we set the
1614     // C string length because invalidation will clear the length.
1615     // FIXME: Even if we can't perfectly model the copy, we should see if we
1616     // can use LazyCompoundVals to copy the source values into the destination.
1617     // This would probably remove any existing bindings past the end of the
1618     // string, but that's still an improvement over blank invalidation.
1619     state = InvalidateBuffer(C, state, Dst, *dstRegVal,
1620                              /*IsSourceBuffer*/false);
1621 
1622     // Invalidate the source (const-invalidation without const-pointer-escaping
1623     // the address of the top-level region).
1624     state = InvalidateBuffer(C, state, srcExpr, srcVal, /*IsSourceBuffer*/true);
1625 
1626     // Set the C string length of the destination, if we know it.
1627     if (isBounded && !isAppending) {
1628       // strncpy is annoying in that it doesn't guarantee to null-terminate
1629       // the result string. If the original string didn't fit entirely inside
1630       // the bound (including the null-terminator), we don't know how long the
1631       // result is.
1632       if (amountCopied != strLength)
1633         finalStrLength = UnknownVal();
1634     }
1635     state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
1636   }
1637 
1638   assert(state);
1639 
1640   // If this is a stpcpy-style copy, but we were unable to check for a buffer
1641   // overflow, we still need a result. Conjure a return value.
1642   if (returnEnd && Result.isUnknown()) {
1643     Result = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
1644   }
1645 
1646   // Set the return value.
1647   state = state->BindExpr(CE, LCtx, Result);
1648   C.addTransition(state);
1649 }
1650 
evalStrcmp(CheckerContext & C,const CallExpr * CE) const1651 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const {
1652   if (CE->getNumArgs() < 2)
1653     return;
1654 
1655   //int strcmp(const char *s1, const char *s2);
1656   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false);
1657 }
1658 
evalStrncmp(CheckerContext & C,const CallExpr * CE) const1659 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const {
1660   if (CE->getNumArgs() < 3)
1661     return;
1662 
1663   //int strncmp(const char *s1, const char *s2, size_t n);
1664   evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false);
1665 }
1666 
evalStrcasecmp(CheckerContext & C,const CallExpr * CE) const1667 void CStringChecker::evalStrcasecmp(CheckerContext &C,
1668                                     const CallExpr *CE) const {
1669   if (CE->getNumArgs() < 2)
1670     return;
1671 
1672   //int strcasecmp(const char *s1, const char *s2);
1673   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true);
1674 }
1675 
evalStrncasecmp(CheckerContext & C,const CallExpr * CE) const1676 void CStringChecker::evalStrncasecmp(CheckerContext &C,
1677                                      const CallExpr *CE) const {
1678   if (CE->getNumArgs() < 3)
1679     return;
1680 
1681   //int strncasecmp(const char *s1, const char *s2, size_t n);
1682   evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true);
1683 }
1684 
evalStrcmpCommon(CheckerContext & C,const CallExpr * CE,bool isBounded,bool ignoreCase) const1685 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
1686                                       bool isBounded, bool ignoreCase) const {
1687   CurrentFunctionDescription = "string comparison function";
1688   ProgramStateRef state = C.getState();
1689   const LocationContext *LCtx = C.getLocationContext();
1690 
1691   // Check that the first string is non-null
1692   const Expr *s1 = CE->getArg(0);
1693   SVal s1Val = state->getSVal(s1, LCtx);
1694   state = checkNonNull(C, state, s1, s1Val);
1695   if (!state)
1696     return;
1697 
1698   // Check that the second string is non-null.
1699   const Expr *s2 = CE->getArg(1);
1700   SVal s2Val = state->getSVal(s2, LCtx);
1701   state = checkNonNull(C, state, s2, s2Val);
1702   if (!state)
1703     return;
1704 
1705   // Get the string length of the first string or give up.
1706   SVal s1Length = getCStringLength(C, state, s1, s1Val);
1707   if (s1Length.isUndef())
1708     return;
1709 
1710   // Get the string length of the second string or give up.
1711   SVal s2Length = getCStringLength(C, state, s2, s2Val);
1712   if (s2Length.isUndef())
1713     return;
1714 
1715   // If we know the two buffers are the same, we know the result is 0.
1716   // First, get the two buffers' addresses. Another checker will have already
1717   // made sure they're not undefined.
1718   DefinedOrUnknownSVal LV = s1Val.castAs<DefinedOrUnknownSVal>();
1719   DefinedOrUnknownSVal RV = s2Val.castAs<DefinedOrUnknownSVal>();
1720 
1721   // See if they are the same.
1722   SValBuilder &svalBuilder = C.getSValBuilder();
1723   DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1724   ProgramStateRef StSameBuf, StNotSameBuf;
1725   std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1726 
1727   // If the two arguments might be the same buffer, we know the result is 0,
1728   // and we only need to check one size.
1729   if (StSameBuf) {
1730     StSameBuf = StSameBuf->BindExpr(CE, LCtx,
1731                                     svalBuilder.makeZeroVal(CE->getType()));
1732     C.addTransition(StSameBuf);
1733 
1734     // If the two arguments are GUARANTEED to be the same, we're done!
1735     if (!StNotSameBuf)
1736       return;
1737   }
1738 
1739   assert(StNotSameBuf);
1740   state = StNotSameBuf;
1741 
1742   // At this point we can go about comparing the two buffers.
1743   // For now, we only do this if they're both known string literals.
1744 
1745   // Attempt to extract string literals from both expressions.
1746   const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val);
1747   const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val);
1748   bool canComputeResult = false;
1749 
1750   if (s1StrLiteral && s2StrLiteral) {
1751     StringRef s1StrRef = s1StrLiteral->getString();
1752     StringRef s2StrRef = s2StrLiteral->getString();
1753 
1754     if (isBounded) {
1755       // Get the max number of characters to compare.
1756       const Expr *lenExpr = CE->getArg(2);
1757       SVal lenVal = state->getSVal(lenExpr, LCtx);
1758 
1759       // If the length is known, we can get the right substrings.
1760       if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
1761         // Create substrings of each to compare the prefix.
1762         s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue());
1763         s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue());
1764         canComputeResult = true;
1765       }
1766     } else {
1767       // This is a normal, unbounded strcmp.
1768       canComputeResult = true;
1769     }
1770 
1771     if (canComputeResult) {
1772       // Real strcmp stops at null characters.
1773       size_t s1Term = s1StrRef.find('\0');
1774       if (s1Term != StringRef::npos)
1775         s1StrRef = s1StrRef.substr(0, s1Term);
1776 
1777       size_t s2Term = s2StrRef.find('\0');
1778       if (s2Term != StringRef::npos)
1779         s2StrRef = s2StrRef.substr(0, s2Term);
1780 
1781       // Use StringRef's comparison methods to compute the actual result.
1782       int result;
1783 
1784       if (ignoreCase) {
1785         // Compare string 1 to string 2 the same way strcasecmp() does.
1786         result = s1StrRef.compare_lower(s2StrRef);
1787       } else {
1788         // Compare string 1 to string 2 the same way strcmp() does.
1789         result = s1StrRef.compare(s2StrRef);
1790       }
1791 
1792       // Build the SVal of the comparison and bind the return value.
1793       SVal resultVal = svalBuilder.makeIntVal(result, CE->getType());
1794       state = state->BindExpr(CE, LCtx, resultVal);
1795     }
1796   }
1797 
1798   if (!canComputeResult) {
1799     // Conjure a symbolic value. It's the best we can do.
1800     SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx,
1801                                                   C.blockCount());
1802     state = state->BindExpr(CE, LCtx, resultVal);
1803   }
1804 
1805   // Record this as a possible path.
1806   C.addTransition(state);
1807 }
1808 
evalStrsep(CheckerContext & C,const CallExpr * CE) const1809 void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const {
1810   //char *strsep(char **stringp, const char *delim);
1811   if (CE->getNumArgs() < 2)
1812     return;
1813 
1814   // Sanity: does the search string parameter match the return type?
1815   const Expr *SearchStrPtr = CE->getArg(0);
1816   QualType CharPtrTy = SearchStrPtr->getType()->getPointeeType();
1817   if (CharPtrTy.isNull() ||
1818       CE->getType().getUnqualifiedType() != CharPtrTy.getUnqualifiedType())
1819     return;
1820 
1821   CurrentFunctionDescription = "strsep()";
1822   ProgramStateRef State = C.getState();
1823   const LocationContext *LCtx = C.getLocationContext();
1824 
1825   // Check that the search string pointer is non-null (though it may point to
1826   // a null string).
1827   SVal SearchStrVal = State->getSVal(SearchStrPtr, LCtx);
1828   State = checkNonNull(C, State, SearchStrPtr, SearchStrVal);
1829   if (!State)
1830     return;
1831 
1832   // Check that the delimiter string is non-null.
1833   const Expr *DelimStr = CE->getArg(1);
1834   SVal DelimStrVal = State->getSVal(DelimStr, LCtx);
1835   State = checkNonNull(C, State, DelimStr, DelimStrVal);
1836   if (!State)
1837     return;
1838 
1839   SValBuilder &SVB = C.getSValBuilder();
1840   SVal Result;
1841   if (Optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) {
1842     // Get the current value of the search string pointer, as a char*.
1843     Result = State->getSVal(*SearchStrLoc, CharPtrTy);
1844 
1845     // Invalidate the search string, representing the change of one delimiter
1846     // character to NUL.
1847     State = InvalidateBuffer(C, State, SearchStrPtr, Result,
1848                              /*IsSourceBuffer*/false);
1849 
1850     // Overwrite the search string pointer. The new value is either an address
1851     // further along in the same string, or NULL if there are no more tokens.
1852     State = State->bindLoc(*SearchStrLoc,
1853                            SVB.conjureSymbolVal(getTag(), CE, LCtx, CharPtrTy,
1854                                                 C.blockCount()));
1855   } else {
1856     assert(SearchStrVal.isUnknown());
1857     // Conjure a symbolic value. It's the best we can do.
1858     Result = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
1859   }
1860 
1861   // Set the return value, and finish.
1862   State = State->BindExpr(CE, LCtx, Result);
1863   C.addTransition(State);
1864 }
1865 
1866 
1867 //===----------------------------------------------------------------------===//
1868 // The driver method, and other Checker callbacks.
1869 //===----------------------------------------------------------------------===//
1870 
evalCall(const CallExpr * CE,CheckerContext & C) const1871 bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
1872   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
1873 
1874   if (!FDecl)
1875     return false;
1876 
1877   // FIXME: Poorly-factored string switches are slow.
1878   FnCheck evalFunction = nullptr;
1879   if (C.isCLibraryFunction(FDecl, "memcpy"))
1880     evalFunction =  &CStringChecker::evalMemcpy;
1881   else if (C.isCLibraryFunction(FDecl, "mempcpy"))
1882     evalFunction =  &CStringChecker::evalMempcpy;
1883   else if (C.isCLibraryFunction(FDecl, "memcmp"))
1884     evalFunction =  &CStringChecker::evalMemcmp;
1885   else if (C.isCLibraryFunction(FDecl, "memmove"))
1886     evalFunction =  &CStringChecker::evalMemmove;
1887   else if (C.isCLibraryFunction(FDecl, "strcpy"))
1888     evalFunction =  &CStringChecker::evalStrcpy;
1889   else if (C.isCLibraryFunction(FDecl, "strncpy"))
1890     evalFunction =  &CStringChecker::evalStrncpy;
1891   else if (C.isCLibraryFunction(FDecl, "stpcpy"))
1892     evalFunction =  &CStringChecker::evalStpcpy;
1893   else if (C.isCLibraryFunction(FDecl, "strcat"))
1894     evalFunction =  &CStringChecker::evalStrcat;
1895   else if (C.isCLibraryFunction(FDecl, "strncat"))
1896     evalFunction =  &CStringChecker::evalStrncat;
1897   else if (C.isCLibraryFunction(FDecl, "strlen"))
1898     evalFunction =  &CStringChecker::evalstrLength;
1899   else if (C.isCLibraryFunction(FDecl, "strnlen"))
1900     evalFunction =  &CStringChecker::evalstrnLength;
1901   else if (C.isCLibraryFunction(FDecl, "strcmp"))
1902     evalFunction =  &CStringChecker::evalStrcmp;
1903   else if (C.isCLibraryFunction(FDecl, "strncmp"))
1904     evalFunction =  &CStringChecker::evalStrncmp;
1905   else if (C.isCLibraryFunction(FDecl, "strcasecmp"))
1906     evalFunction =  &CStringChecker::evalStrcasecmp;
1907   else if (C.isCLibraryFunction(FDecl, "strncasecmp"))
1908     evalFunction =  &CStringChecker::evalStrncasecmp;
1909   else if (C.isCLibraryFunction(FDecl, "strsep"))
1910     evalFunction =  &CStringChecker::evalStrsep;
1911   else if (C.isCLibraryFunction(FDecl, "bcopy"))
1912     evalFunction =  &CStringChecker::evalBcopy;
1913   else if (C.isCLibraryFunction(FDecl, "bcmp"))
1914     evalFunction =  &CStringChecker::evalMemcmp;
1915 
1916   // If the callee isn't a string function, let another checker handle it.
1917   if (!evalFunction)
1918     return false;
1919 
1920   // Make sure each function sets its own description.
1921   // (But don't bother in a release build.)
1922   assert(!(CurrentFunctionDescription = nullptr));
1923 
1924   // Check and evaluate the call.
1925   (this->*evalFunction)(C, CE);
1926 
1927   // If the evaluate call resulted in no change, chain to the next eval call
1928   // handler.
1929   // Note, the custom CString evaluation calls assume that basic safety
1930   // properties are held. However, if the user chooses to turn off some of these
1931   // checks, we ignore the issues and leave the call evaluation to a generic
1932   // handler.
1933   if (!C.isDifferent())
1934     return false;
1935 
1936   return true;
1937 }
1938 
checkPreStmt(const DeclStmt * DS,CheckerContext & C) const1939 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
1940   // Record string length for char a[] = "abc";
1941   ProgramStateRef state = C.getState();
1942 
1943   for (const auto *I : DS->decls()) {
1944     const VarDecl *D = dyn_cast<VarDecl>(I);
1945     if (!D)
1946       continue;
1947 
1948     // FIXME: Handle array fields of structs.
1949     if (!D->getType()->isArrayType())
1950       continue;
1951 
1952     const Expr *Init = D->getInit();
1953     if (!Init)
1954       continue;
1955     if (!isa<StringLiteral>(Init))
1956       continue;
1957 
1958     Loc VarLoc = state->getLValue(D, C.getLocationContext());
1959     const MemRegion *MR = VarLoc.getAsRegion();
1960     if (!MR)
1961       continue;
1962 
1963     SVal StrVal = state->getSVal(Init, C.getLocationContext());
1964     assert(StrVal.isValid() && "Initializer string is unknown or undefined");
1965     DefinedOrUnknownSVal strLength =
1966         getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>();
1967 
1968     state = state->set<CStringLength>(MR, strLength);
1969   }
1970 
1971   C.addTransition(state);
1972 }
1973 
wantsRegionChangeUpdate(ProgramStateRef state) const1974 bool CStringChecker::wantsRegionChangeUpdate(ProgramStateRef state) const {
1975   CStringLengthTy Entries = state->get<CStringLength>();
1976   return !Entries.isEmpty();
1977 }
1978 
1979 ProgramStateRef
checkRegionChanges(ProgramStateRef state,const InvalidatedSymbols *,ArrayRef<const MemRegion * > ExplicitRegions,ArrayRef<const MemRegion * > Regions,const CallEvent * Call) const1980 CStringChecker::checkRegionChanges(ProgramStateRef state,
1981                                    const InvalidatedSymbols *,
1982                                    ArrayRef<const MemRegion *> ExplicitRegions,
1983                                    ArrayRef<const MemRegion *> Regions,
1984                                    const CallEvent *Call) const {
1985   CStringLengthTy Entries = state->get<CStringLength>();
1986   if (Entries.isEmpty())
1987     return state;
1988 
1989   llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
1990   llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
1991 
1992   // First build sets for the changed regions and their super-regions.
1993   for (ArrayRef<const MemRegion *>::iterator
1994        I = Regions.begin(), E = Regions.end(); I != E; ++I) {
1995     const MemRegion *MR = *I;
1996     Invalidated.insert(MR);
1997 
1998     SuperRegions.insert(MR);
1999     while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
2000       MR = SR->getSuperRegion();
2001       SuperRegions.insert(MR);
2002     }
2003   }
2004 
2005   CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2006 
2007   // Then loop over the entries in the current state.
2008   for (CStringLengthTy::iterator I = Entries.begin(),
2009        E = Entries.end(); I != E; ++I) {
2010     const MemRegion *MR = I.getKey();
2011 
2012     // Is this entry for a super-region of a changed region?
2013     if (SuperRegions.count(MR)) {
2014       Entries = F.remove(Entries, MR);
2015       continue;
2016     }
2017 
2018     // Is this entry for a sub-region of a changed region?
2019     const MemRegion *Super = MR;
2020     while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
2021       Super = SR->getSuperRegion();
2022       if (Invalidated.count(Super)) {
2023         Entries = F.remove(Entries, MR);
2024         break;
2025       }
2026     }
2027   }
2028 
2029   return state->set<CStringLength>(Entries);
2030 }
2031 
checkLiveSymbols(ProgramStateRef state,SymbolReaper & SR) const2032 void CStringChecker::checkLiveSymbols(ProgramStateRef state,
2033                                       SymbolReaper &SR) const {
2034   // Mark all symbols in our string length map as valid.
2035   CStringLengthTy Entries = state->get<CStringLength>();
2036 
2037   for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
2038        I != E; ++I) {
2039     SVal Len = I.getData();
2040 
2041     for (SymExpr::symbol_iterator si = Len.symbol_begin(),
2042                                   se = Len.symbol_end(); si != se; ++si)
2043       SR.markInUse(*si);
2044   }
2045 }
2046 
checkDeadSymbols(SymbolReaper & SR,CheckerContext & C) const2047 void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
2048                                       CheckerContext &C) const {
2049   if (!SR.hasDeadSymbols())
2050     return;
2051 
2052   ProgramStateRef state = C.getState();
2053   CStringLengthTy Entries = state->get<CStringLength>();
2054   if (Entries.isEmpty())
2055     return;
2056 
2057   CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2058   for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
2059        I != E; ++I) {
2060     SVal Len = I.getData();
2061     if (SymbolRef Sym = Len.getAsSymbol()) {
2062       if (SR.isDead(Sym))
2063         Entries = F.remove(Entries, I.getKey());
2064     }
2065   }
2066 
2067   state = state->set<CStringLength>(Entries);
2068   C.addTransition(state);
2069 }
2070 
2071 #define REGISTER_CHECKER(name)                                                 \
2072   void ento::register##name(CheckerManager &mgr) {                             \
2073     CStringChecker *checker = mgr.registerChecker<CStringChecker>();           \
2074     checker->Filter.Check##name = true;                                        \
2075     checker->Filter.CheckName##name = mgr.getCurrentCheckName();               \
2076   }
2077 
2078 REGISTER_CHECKER(CStringNullArg)
REGISTER_CHECKER(CStringOutOfBounds)2079 REGISTER_CHECKER(CStringOutOfBounds)
2080 REGISTER_CHECKER(CStringBufferOverlap)
2081 REGISTER_CHECKER(CStringNotNullTerm)
2082 
2083 void ento::registerCStringCheckerBasic(CheckerManager &Mgr) {
2084   registerCStringNullArg(Mgr);
2085 }
2086