• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  // FormatString.cpp - Common stuff for handling printf/scanf formats -*- C++ -*-
2  //
3  //                     The LLVM Compiler Infrastructure
4  //
5  // This file is distributed under the University of Illinois Open Source
6  // License. See LICENSE.TXT for details.
7  //
8  //===----------------------------------------------------------------------===//
9  //
10  // Shared details for processing format strings of printf and scanf
11  // (and friends).
12  //
13  //===----------------------------------------------------------------------===//
14  
15  #include "FormatStringParsing.h"
16  
17  using clang::analyze_format_string::ArgTypeResult;
18  using clang::analyze_format_string::FormatStringHandler;
19  using clang::analyze_format_string::FormatSpecifier;
20  using clang::analyze_format_string::LengthModifier;
21  using clang::analyze_format_string::OptionalAmount;
22  using clang::analyze_format_string::PositionContext;
23  using clang::analyze_format_string::ConversionSpecifier;
24  using namespace clang;
25  
26  // Key function to FormatStringHandler.
~FormatStringHandler()27  FormatStringHandler::~FormatStringHandler() {}
28  
29  //===----------------------------------------------------------------------===//
30  // Functions for parsing format strings components in both printf and
31  // scanf format strings.
32  //===----------------------------------------------------------------------===//
33  
34  OptionalAmount
ParseAmount(const char * & Beg,const char * E)35  clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) {
36    const char *I = Beg;
37    UpdateOnReturn <const char*> UpdateBeg(Beg, I);
38  
39    unsigned accumulator = 0;
40    bool hasDigits = false;
41  
42    for ( ; I != E; ++I) {
43      char c = *I;
44      if (c >= '0' && c <= '9') {
45        hasDigits = true;
46        accumulator = (accumulator * 10) + (c - '0');
47        continue;
48      }
49  
50      if (hasDigits)
51        return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg,
52            false);
53  
54      break;
55    }
56  
57    return OptionalAmount();
58  }
59  
60  OptionalAmount
ParseNonPositionAmount(const char * & Beg,const char * E,unsigned & argIndex)61  clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg,
62                                                       const char *E,
63                                                       unsigned &argIndex) {
64    if (*Beg == '*') {
65      ++Beg;
66      return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false);
67    }
68  
69    return ParseAmount(Beg, E);
70  }
71  
72  OptionalAmount
ParsePositionAmount(FormatStringHandler & H,const char * Start,const char * & Beg,const char * E,PositionContext p)73  clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H,
74                                                    const char *Start,
75                                                    const char *&Beg,
76                                                    const char *E,
77                                                    PositionContext p) {
78    if (*Beg == '*') {
79      const char *I = Beg + 1;
80      const OptionalAmount &Amt = ParseAmount(I, E);
81  
82      if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) {
83        H.HandleInvalidPosition(Beg, I - Beg, p);
84        return OptionalAmount(false);
85      }
86  
87      if (I == E) {
88        // No more characters left?
89        H.HandleIncompleteSpecifier(Start, E - Start);
90        return OptionalAmount(false);
91      }
92  
93      assert(Amt.getHowSpecified() == OptionalAmount::Constant);
94  
95      if (*I == '$') {
96        // Handle positional arguments
97  
98        // Special case: '*0$', since this is an easy mistake.
99        if (Amt.getConstantAmount() == 0) {
100          H.HandleZeroPosition(Beg, I - Beg + 1);
101          return OptionalAmount(false);
102        }
103  
104        const char *Tmp = Beg;
105        Beg = ++I;
106  
107        return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1,
108                              Tmp, 0, true);
109      }
110  
111      H.HandleInvalidPosition(Beg, I - Beg, p);
112      return OptionalAmount(false);
113    }
114  
115    return ParseAmount(Beg, E);
116  }
117  
118  
119  bool
ParseFieldWidth(FormatStringHandler & H,FormatSpecifier & CS,const char * Start,const char * & Beg,const char * E,unsigned * argIndex)120  clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H,
121                                                FormatSpecifier &CS,
122                                                const char *Start,
123                                                const char *&Beg, const char *E,
124                                                unsigned *argIndex) {
125    // FIXME: Support negative field widths.
126    if (argIndex) {
127      CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex));
128    }
129    else {
130      const OptionalAmount Amt =
131        ParsePositionAmount(H, Start, Beg, E,
132                            analyze_format_string::FieldWidthPos);
133  
134      if (Amt.isInvalid())
135        return true;
136      CS.setFieldWidth(Amt);
137    }
138    return false;
139  }
140  
141  bool
ParseArgPosition(FormatStringHandler & H,FormatSpecifier & FS,const char * Start,const char * & Beg,const char * E)142  clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H,
143                                                 FormatSpecifier &FS,
144                                                 const char *Start,
145                                                 const char *&Beg,
146                                                 const char *E) {
147    const char *I = Beg;
148  
149    const OptionalAmount &Amt = ParseAmount(I, E);
150  
151    if (I == E) {
152      // No more characters left?
153      H.HandleIncompleteSpecifier(Start, E - Start);
154      return true;
155    }
156  
157    if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') {
158      // Special case: '%0$', since this is an easy mistake.
159      if (Amt.getConstantAmount() == 0) {
160        H.HandleZeroPosition(Start, I - Start);
161        return true;
162      }
163  
164      FS.setArgIndex(Amt.getConstantAmount() - 1);
165      FS.setUsesPositionalArg();
166      // Update the caller's pointer if we decided to consume
167      // these characters.
168      Beg = I;
169      return false;
170    }
171  
172    return false;
173  }
174  
175  bool
ParseLengthModifier(FormatSpecifier & FS,const char * & I,const char * E)176  clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
177                                                    const char *&I,
178                                                    const char *E) {
179    LengthModifier::Kind lmKind = LengthModifier::None;
180    const char *lmPosition = I;
181    switch (*I) {
182      default:
183        return false;
184      case 'h':
185        ++I;
186        lmKind = (I != E && *I == 'h') ?
187        ++I, LengthModifier::AsChar : LengthModifier::AsShort;
188        break;
189      case 'l':
190        ++I;
191        lmKind = (I != E && *I == 'l') ?
192        ++I, LengthModifier::AsLongLong : LengthModifier::AsLong;
193        break;
194      case 'j': lmKind = LengthModifier::AsIntMax;     ++I; break;
195      case 'z': lmKind = LengthModifier::AsSizeT;      ++I; break;
196      case 't': lmKind = LengthModifier::AsPtrDiff;    ++I; break;
197      case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break;
198      case 'q': lmKind = LengthModifier::AsLongLong;   ++I; break;
199    }
200    LengthModifier lm(lmPosition, lmKind);
201    FS.setLengthModifier(lm);
202    return true;
203  }
204  
205  //===----------------------------------------------------------------------===//
206  // Methods on ArgTypeResult.
207  //===----------------------------------------------------------------------===//
208  
matchesType(ASTContext & C,QualType argTy) const209  bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const {
210    switch (K) {
211      case InvalidTy:
212        assert(false && "ArgTypeResult must be valid");
213        return true;
214  
215      case UnknownTy:
216        return true;
217  
218      case SpecificTy: {
219        argTy = C.getCanonicalType(argTy).getUnqualifiedType();
220        if (T == argTy)
221          return true;
222        // Check for "compatible types".
223        if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
224          switch (BT->getKind()) {
225            default:
226              break;
227            case BuiltinType::Char_S:
228            case BuiltinType::SChar:
229              return T == C.UnsignedCharTy;
230            case BuiltinType::Char_U:
231            case BuiltinType::UChar:
232              return T == C.SignedCharTy;
233            case BuiltinType::Short:
234              return T == C.UnsignedShortTy;
235            case BuiltinType::UShort:
236              return T == C.ShortTy;
237            case BuiltinType::Int:
238              return T == C.UnsignedIntTy;
239            case BuiltinType::UInt:
240              return T == C.IntTy;
241            case BuiltinType::Long:
242              return T == C.UnsignedLongTy;
243            case BuiltinType::ULong:
244              return T == C.LongTy;
245            case BuiltinType::LongLong:
246              return T == C.UnsignedLongLongTy;
247            case BuiltinType::ULongLong:
248              return T == C.LongLongTy;
249          }
250        return false;
251      }
252  
253      case CStrTy: {
254        const PointerType *PT = argTy->getAs<PointerType>();
255        if (!PT)
256          return false;
257        QualType pointeeTy = PT->getPointeeType();
258        if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>())
259          switch (BT->getKind()) {
260            case BuiltinType::Void:
261            case BuiltinType::Char_U:
262            case BuiltinType::UChar:
263            case BuiltinType::Char_S:
264            case BuiltinType::SChar:
265              return true;
266            default:
267              break;
268          }
269  
270        return false;
271      }
272  
273      case WCStrTy: {
274        const PointerType *PT = argTy->getAs<PointerType>();
275        if (!PT)
276          return false;
277        QualType pointeeTy =
278          C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType();
279        return pointeeTy == C.getWCharType();
280      }
281  
282      case WIntTy: {
283        // Instead of doing a lookup for the definition of 'wint_t' (which
284        // is defined by the system headers) instead see if wchar_t and
285        // the argument type promote to the same type.
286        QualType PromoWChar =
287          C.getWCharType()->isPromotableIntegerType()
288            ? C.getPromotedIntegerType(C.getWCharType()) : C.getWCharType();
289        QualType PromoArg =
290          argTy->isPromotableIntegerType()
291            ? C.getPromotedIntegerType(argTy) : argTy;
292  
293        PromoWChar = C.getCanonicalType(PromoWChar).getUnqualifiedType();
294        PromoArg = C.getCanonicalType(PromoArg).getUnqualifiedType();
295  
296        return PromoWChar == PromoArg;
297      }
298  
299      case CPointerTy:
300        return argTy->isPointerType() || argTy->isObjCObjectPointerType() ||
301          argTy->isNullPtrType();
302  
303      case ObjCPointerTy:
304        return argTy->getAs<ObjCObjectPointerType>() != NULL;
305    }
306  
307    // FIXME: Should be unreachable, but Clang is currently emitting
308    // a warning.
309    return false;
310  }
311  
getRepresentativeType(ASTContext & C) const312  QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const {
313    switch (K) {
314      case InvalidTy:
315        assert(false && "No representative type for Invalid ArgTypeResult");
316        // Fall-through.
317      case UnknownTy:
318        return QualType();
319      case SpecificTy:
320        return T;
321      case CStrTy:
322        return C.getPointerType(C.CharTy);
323      case WCStrTy:
324        return C.getPointerType(C.getWCharType());
325      case ObjCPointerTy:
326        return C.ObjCBuiltinIdTy;
327      case CPointerTy:
328        return C.VoidPtrTy;
329      case WIntTy: {
330        QualType WC = C.getWCharType();
331        return WC->isPromotableIntegerType() ? C.getPromotedIntegerType(WC) : WC;
332      }
333    }
334  
335    // FIXME: Should be unreachable, but Clang is currently emitting
336    // a warning.
337    return QualType();
338  }
339  
340  //===----------------------------------------------------------------------===//
341  // Methods on OptionalAmount.
342  //===----------------------------------------------------------------------===//
343  
344  ArgTypeResult
getArgType(ASTContext & Ctx) const345  analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const {
346    return Ctx.IntTy;
347  }
348  
349  //===----------------------------------------------------------------------===//
350  // Methods on LengthModifier.
351  //===----------------------------------------------------------------------===//
352  
353  const char *
toString() const354  analyze_format_string::LengthModifier::toString() const {
355    switch (kind) {
356    case AsChar:
357      return "hh";
358    case AsShort:
359      return "h";
360    case AsLong: // or AsWideChar
361      return "l";
362    case AsLongLong:
363      return "ll";
364    case AsIntMax:
365      return "j";
366    case AsSizeT:
367      return "z";
368    case AsPtrDiff:
369      return "t";
370    case AsLongDouble:
371      return "L";
372    case None:
373      return "";
374    }
375    return NULL;
376  }
377  
378  //===----------------------------------------------------------------------===//
379  // Methods on OptionalAmount.
380  //===----------------------------------------------------------------------===//
381  
toString(llvm::raw_ostream & os) const382  void OptionalAmount::toString(llvm::raw_ostream &os) const {
383    switch (hs) {
384    case Invalid:
385    case NotSpecified:
386      return;
387    case Arg:
388      if (UsesDotPrefix)
389          os << ".";
390      if (usesPositionalArg())
391        os << "*" << getPositionalArgIndex() << "$";
392      else
393        os << "*";
394      break;
395    case Constant:
396      if (UsesDotPrefix)
397          os << ".";
398      os << amt;
399      break;
400    }
401  }
402  
403  //===----------------------------------------------------------------------===//
404  // Methods on ConversionSpecifier.
405  //===----------------------------------------------------------------------===//
406  
hasValidLengthModifier() const407  bool FormatSpecifier::hasValidLengthModifier() const {
408    switch (LM.getKind()) {
409      case LengthModifier::None:
410        return true;
411  
412          // Handle most integer flags
413      case LengthModifier::AsChar:
414      case LengthModifier::AsShort:
415      case LengthModifier::AsLongLong:
416      case LengthModifier::AsIntMax:
417      case LengthModifier::AsSizeT:
418      case LengthModifier::AsPtrDiff:
419        switch (CS.getKind()) {
420          case ConversionSpecifier::dArg:
421          case ConversionSpecifier::iArg:
422          case ConversionSpecifier::oArg:
423          case ConversionSpecifier::uArg:
424          case ConversionSpecifier::xArg:
425          case ConversionSpecifier::XArg:
426          case ConversionSpecifier::nArg:
427            return true;
428          default:
429            return false;
430        }
431  
432          // Handle 'l' flag
433      case LengthModifier::AsLong:
434        switch (CS.getKind()) {
435          case ConversionSpecifier::dArg:
436          case ConversionSpecifier::iArg:
437          case ConversionSpecifier::oArg:
438          case ConversionSpecifier::uArg:
439          case ConversionSpecifier::xArg:
440          case ConversionSpecifier::XArg:
441          case ConversionSpecifier::aArg:
442          case ConversionSpecifier::AArg:
443          case ConversionSpecifier::fArg:
444          case ConversionSpecifier::FArg:
445          case ConversionSpecifier::eArg:
446          case ConversionSpecifier::EArg:
447          case ConversionSpecifier::gArg:
448          case ConversionSpecifier::GArg:
449          case ConversionSpecifier::nArg:
450          case ConversionSpecifier::cArg:
451          case ConversionSpecifier::sArg:
452            return true;
453          default:
454            return false;
455        }
456  
457      case LengthModifier::AsLongDouble:
458        switch (CS.getKind()) {
459          case ConversionSpecifier::aArg:
460          case ConversionSpecifier::AArg:
461          case ConversionSpecifier::fArg:
462          case ConversionSpecifier::FArg:
463          case ConversionSpecifier::eArg:
464          case ConversionSpecifier::EArg:
465          case ConversionSpecifier::gArg:
466          case ConversionSpecifier::GArg:
467            return true;
468          default:
469            return false;
470        }
471    }
472    return false;
473  }
474  
475  
476