• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines APIs for analyzing the format strings of printf, fscanf,
11 // and friends.
12 //
13 // The structure of format strings for fprintf are described in C99 7.19.6.1.
14 //
15 // The structure of format strings for fscanf are described in C99 7.19.6.2.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #ifndef LLVM_CLANG_FORMAT_H
20 #define LLVM_CLANG_FORMAT_H
21 
22 #include "clang/AST/CanonicalType.h"
23 
24 namespace clang {
25 
26 //===----------------------------------------------------------------------===//
27 /// Common components of both fprintf and fscanf format strings.
28 namespace analyze_format_string {
29 
30 /// Class representing optional flags with location and representation
31 /// information.
32 class OptionalFlag {
33 public:
OptionalFlag(const char * Representation)34   OptionalFlag(const char *Representation)
35       : representation(Representation), flag(false) {}
isSet()36   bool isSet() { return flag; }
set()37   void set() { flag = true; }
clear()38   void clear() { flag = false; }
setPosition(const char * position)39   void setPosition(const char *position) {
40     assert(position);
41     this->position = position;
42   }
getPosition()43   const char *getPosition() const {
44     assert(position);
45     return position;
46   }
toString()47   const char *toString() const { return representation; }
48 
49   // Overloaded operators for bool like qualities
50   operator bool() const { return flag; }
51   OptionalFlag& operator=(const bool &rhs) {
52     flag = rhs;
53     return *this;  // Return a reference to myself.
54   }
55 private:
56   const char *representation;
57   const char *position;
58   bool flag;
59 };
60 
61 /// Represents the length modifier in a format string in scanf/printf.
62 class LengthModifier {
63 public:
64   enum Kind {
65     None,
66     AsChar,       // 'hh'
67     AsShort,      // 'h'
68     AsLong,       // 'l'
69     AsLongLong,   // 'll', 'q' (BSD, deprecated)
70     AsIntMax,     // 'j'
71     AsSizeT,      // 'z'
72     AsPtrDiff,    // 't'
73     AsLongDouble, // 'L'
74     AsWideChar = AsLong // for '%ls', only makes sense for printf
75   };
76 
LengthModifier()77   LengthModifier()
78     : Position(0), kind(None) {}
LengthModifier(const char * pos,Kind k)79   LengthModifier(const char *pos, Kind k)
80     : Position(pos), kind(k) {}
81 
getStart()82   const char *getStart() const {
83     return Position;
84   }
85 
getLength()86   unsigned getLength() const {
87     switch (kind) {
88       default:
89         return 1;
90       case AsLongLong:
91       case AsChar:
92         return 2;
93       case None:
94         return 0;
95     }
96   }
97 
getKind()98   Kind getKind() const { return kind; }
setKind(Kind k)99   void setKind(Kind k) { kind = k; }
100 
101   const char *toString() const;
102 
103 private:
104   const char *Position;
105   Kind kind;
106 };
107 
108 class ConversionSpecifier {
109 public:
110   enum Kind {
111     InvalidSpecifier = 0,
112       // C99 conversion specifiers.
113     cArg,
114     dArg,
115     iArg,
116     IntArgBeg = cArg, IntArgEnd = iArg,
117 
118     oArg,
119     uArg,
120     xArg,
121     XArg,
122     UIntArgBeg = oArg, UIntArgEnd = XArg,
123 
124     fArg,
125     FArg,
126     eArg,
127     EArg,
128     gArg,
129     GArg,
130     aArg,
131     AArg,
132     DoubleArgBeg = fArg, DoubleArgEnd = AArg,
133 
134     sArg,
135     pArg,
136     nArg,
137     PercentArg,
138     CArg,
139     SArg,
140 
141     // ** Printf-specific **
142 
143     // Objective-C specific specifiers.
144     ObjCObjArg,  // '@'
145     ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
146 
147     // GlibC specific specifiers.
148     PrintErrno,   // 'm'
149 
150     PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
151 
152     // ** Scanf-specific **
153     ScanListArg, // '['
154     ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
155   };
156 
ConversionSpecifier(bool isPrintf)157   ConversionSpecifier(bool isPrintf)
158     : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {}
159 
ConversionSpecifier(bool isPrintf,const char * pos,Kind k)160   ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
161     : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {}
162 
getStart()163   const char *getStart() const {
164     return Position;
165   }
166 
getCharacters()167   llvm::StringRef getCharacters() const {
168     return llvm::StringRef(getStart(), getLength());
169   }
170 
consumesDataArgument()171   bool consumesDataArgument() const {
172     switch (kind) {
173       case PrintErrno:
174         assert(IsPrintf);
175       case PercentArg:
176         return false;
177       default:
178         return true;
179     }
180   }
181 
getKind()182   Kind getKind() const { return kind; }
setKind(Kind k)183   void setKind(Kind k) { kind = k; }
getLength()184   unsigned getLength() const {
185     return EndScanList ? EndScanList - Position : 1;
186   }
187 
188   const char *toString() const;
189 
isPrintfKind()190   bool isPrintfKind() const { return IsPrintf; }
191 
192 protected:
193   bool IsPrintf;
194   const char *Position;
195   const char *EndScanList;
196   Kind kind;
197 };
198 
199 class ArgTypeResult {
200 public:
201   enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
202     CStrTy, WCStrTy, WIntTy };
203 private:
204   const Kind K;
205   QualType T;
ArgTypeResult(bool)206   ArgTypeResult(bool) : K(InvalidTy) {}
207 public:
K(k)208   ArgTypeResult(Kind k = UnknownTy) : K(k) {}
ArgTypeResult(QualType t)209   ArgTypeResult(QualType t) : K(SpecificTy), T(t) {}
ArgTypeResult(CanQualType t)210   ArgTypeResult(CanQualType t) : K(SpecificTy), T(t) {}
211 
Invalid()212   static ArgTypeResult Invalid() { return ArgTypeResult(true); }
213 
isValid()214   bool isValid() const { return K != InvalidTy; }
215 
getSpecificType()216   const QualType *getSpecificType() const {
217     return K == SpecificTy ? &T : 0;
218   }
219 
220   bool matchesType(ASTContext &C, QualType argTy) const;
221 
matchesAnyObjCObjectRef()222   bool matchesAnyObjCObjectRef() const { return K == ObjCPointerTy; }
223 
224   QualType getRepresentativeType(ASTContext &C) const;
225 };
226 
227 class OptionalAmount {
228 public:
229   enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
230 
OptionalAmount(HowSpecified howSpecified,unsigned amount,const char * amountStart,unsigned amountLength,bool usesPositionalArg)231   OptionalAmount(HowSpecified howSpecified,
232                  unsigned amount,
233                  const char *amountStart,
234                  unsigned amountLength,
235                  bool usesPositionalArg)
236   : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
237   UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
238 
239   OptionalAmount(bool valid = true)
240   : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
241   UsesPositionalArg(0), UsesDotPrefix(0) {}
242 
isInvalid()243   bool isInvalid() const {
244     return hs == Invalid;
245   }
246 
getHowSpecified()247   HowSpecified getHowSpecified() const { return hs; }
setHowSpecified(HowSpecified h)248   void setHowSpecified(HowSpecified h) { hs = h; }
249 
hasDataArgument()250   bool hasDataArgument() const { return hs == Arg; }
251 
getArgIndex()252   unsigned getArgIndex() const {
253     assert(hasDataArgument());
254     return amt;
255   }
256 
getConstantAmount()257   unsigned getConstantAmount() const {
258     assert(hs == Constant);
259     return amt;
260   }
261 
getStart()262   const char *getStart() const {
263       // We include the . character if it is given.
264     return start - UsesDotPrefix;
265   }
266 
getConstantLength()267   unsigned getConstantLength() const {
268     assert(hs == Constant);
269     return length + UsesDotPrefix;
270   }
271 
272   ArgTypeResult getArgType(ASTContext &Ctx) const;
273 
274   void toString(llvm::raw_ostream &os) const;
275 
usesPositionalArg()276   bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
getPositionalArgIndex()277   unsigned getPositionalArgIndex() const {
278     assert(hasDataArgument());
279     return amt + 1;
280   }
281 
usesDotPrefix()282   bool usesDotPrefix() const { return UsesDotPrefix; }
setUsesDotPrefix()283   void setUsesDotPrefix() { UsesDotPrefix = true; }
284 
285 private:
286   const char *start;
287   unsigned length;
288   HowSpecified hs;
289   unsigned amt;
290   bool UsesPositionalArg : 1;
291   bool UsesDotPrefix;
292 };
293 
294 
295 class FormatSpecifier {
296 protected:
297   LengthModifier LM;
298   OptionalAmount FieldWidth;
299   ConversionSpecifier CS;
300     /// Positional arguments, an IEEE extension:
301     ///  IEEE Std 1003.1, 2004 Edition
302     ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
303   bool UsesPositionalArg;
304   unsigned argIndex;
305 public:
FormatSpecifier(bool isPrintf)306   FormatSpecifier(bool isPrintf)
307     : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
308 
setLengthModifier(LengthModifier lm)309   void setLengthModifier(LengthModifier lm) {
310     LM = lm;
311   }
312 
setUsesPositionalArg()313   void setUsesPositionalArg() { UsesPositionalArg = true; }
314 
setArgIndex(unsigned i)315   void setArgIndex(unsigned i) {
316     argIndex = i;
317   }
318 
getArgIndex()319   unsigned getArgIndex() const {
320     return argIndex;
321   }
322 
getPositionalArgIndex()323   unsigned getPositionalArgIndex() const {
324     return argIndex + 1;
325   }
326 
getLengthModifier()327   const LengthModifier &getLengthModifier() const {
328     return LM;
329   }
330 
getFieldWidth()331   const OptionalAmount &getFieldWidth() const {
332     return FieldWidth;
333   }
334 
setFieldWidth(const OptionalAmount & Amt)335   void setFieldWidth(const OptionalAmount &Amt) {
336     FieldWidth = Amt;
337   }
338 
usesPositionalArg()339   bool usesPositionalArg() const { return UsesPositionalArg; }
340 
341   bool hasValidLengthModifier() const;
342 };
343 
344 } // end analyze_format_string namespace
345 
346 //===----------------------------------------------------------------------===//
347 /// Pieces specific to fprintf format strings.
348 
349 namespace analyze_printf {
350 
351 class PrintfConversionSpecifier :
352   public analyze_format_string::ConversionSpecifier  {
353 public:
PrintfConversionSpecifier()354   PrintfConversionSpecifier()
355     : ConversionSpecifier(true, 0, InvalidSpecifier) {}
356 
PrintfConversionSpecifier(const char * pos,Kind k)357   PrintfConversionSpecifier(const char *pos, Kind k)
358     : ConversionSpecifier(true, pos, k) {}
359 
isObjCArg()360   bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
isIntArg()361   bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
isUIntArg()362   bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
isDoubleArg()363   bool isDoubleArg() const { return kind >= DoubleArgBeg &&
364                                     kind <= DoubleArgBeg; }
getLength()365   unsigned getLength() const {
366       // Conversion specifiers currently only are represented by
367       // single characters, but we be flexible.
368     return 1;
369   }
370 
classof(const analyze_format_string::ConversionSpecifier * CS)371   static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
372     return CS->isPrintfKind();
373   }
374 };
375 
376 using analyze_format_string::ArgTypeResult;
377 using analyze_format_string::LengthModifier;
378 using analyze_format_string::OptionalAmount;
379 using analyze_format_string::OptionalFlag;
380 
381 class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
382   OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
383   OptionalFlag IsLeftJustified; // '-'
384   OptionalFlag HasPlusPrefix; // '+'
385   OptionalFlag HasSpacePrefix; // ' '
386   OptionalFlag HasAlternativeForm; // '#'
387   OptionalFlag HasLeadingZeroes; // '0'
388   OptionalAmount Precision;
389 public:
PrintfSpecifier()390   PrintfSpecifier() :
391     FormatSpecifier(/* isPrintf = */ true),
392     HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
393     HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
394 
395   static PrintfSpecifier Parse(const char *beg, const char *end);
396 
397     // Methods for incrementally constructing the PrintfSpecifier.
setConversionSpecifier(const PrintfConversionSpecifier & cs)398   void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
399     CS = cs;
400   }
setHasThousandsGrouping(const char * position)401   void setHasThousandsGrouping(const char *position) {
402     HasThousandsGrouping = true;
403     HasThousandsGrouping.setPosition(position);
404   }
setIsLeftJustified(const char * position)405   void setIsLeftJustified(const char *position) {
406     IsLeftJustified = true;
407     IsLeftJustified.setPosition(position);
408   }
setHasPlusPrefix(const char * position)409   void setHasPlusPrefix(const char *position) {
410     HasPlusPrefix = true;
411     HasPlusPrefix.setPosition(position);
412   }
setHasSpacePrefix(const char * position)413   void setHasSpacePrefix(const char *position) {
414     HasSpacePrefix = true;
415     HasSpacePrefix.setPosition(position);
416   }
setHasAlternativeForm(const char * position)417   void setHasAlternativeForm(const char *position) {
418     HasAlternativeForm = true;
419     HasAlternativeForm.setPosition(position);
420   }
setHasLeadingZeros(const char * position)421   void setHasLeadingZeros(const char *position) {
422     HasLeadingZeroes = true;
423     HasLeadingZeroes.setPosition(position);
424   }
setUsesPositionalArg()425   void setUsesPositionalArg() { UsesPositionalArg = true; }
426 
427     // Methods for querying the format specifier.
428 
getConversionSpecifier()429   const PrintfConversionSpecifier &getConversionSpecifier() const {
430     return cast<PrintfConversionSpecifier>(CS);
431   }
432 
setPrecision(const OptionalAmount & Amt)433   void setPrecision(const OptionalAmount &Amt) {
434     Precision = Amt;
435     Precision.setUsesDotPrefix();
436   }
437 
getPrecision()438   const OptionalAmount &getPrecision() const {
439     return Precision;
440   }
441 
consumesDataArgument()442   bool consumesDataArgument() const {
443     return getConversionSpecifier().consumesDataArgument();
444   }
445 
446   /// \brief Returns the builtin type that a data argument
447   /// paired with this format specifier should have.  This method
448   /// will return null if the format specifier does not have
449   /// a matching data argument or the matching argument matches
450   /// more than one type.
451   ArgTypeResult getArgType(ASTContext &Ctx) const;
452 
hasThousandsGrouping()453   const OptionalFlag &hasThousandsGrouping() const {
454       return HasThousandsGrouping;
455   }
isLeftJustified()456   const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
hasPlusPrefix()457   const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
hasAlternativeForm()458   const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
hasLeadingZeros()459   const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
hasSpacePrefix()460   const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
usesPositionalArg()461   bool usesPositionalArg() const { return UsesPositionalArg; }
462 
463     /// Changes the specifier and length according to a QualType, retaining any
464     /// flags or options. Returns true on success, or false when a conversion
465     /// was not successful.
466   bool fixType(QualType QT);
467 
468   void toString(llvm::raw_ostream &os) const;
469 
470     // Validation methods - to check if any element results in undefined behavior
471   bool hasValidPlusPrefix() const;
472   bool hasValidAlternativeForm() const;
473   bool hasValidLeadingZeros() const;
474   bool hasValidSpacePrefix() const;
475   bool hasValidLeftJustified() const;
476   bool hasValidThousandsGroupingPrefix() const;
477 
478   bool hasValidPrecision() const;
479   bool hasValidFieldWidth() const;
480 };
481 }  // end analyze_printf namespace
482 
483 //===----------------------------------------------------------------------===//
484 /// Pieces specific to fscanf format strings.
485 
486 namespace analyze_scanf {
487 
488 class ScanfConversionSpecifier :
489     public analyze_format_string::ConversionSpecifier  {
490 public:
ScanfConversionSpecifier()491   ScanfConversionSpecifier()
492     : ConversionSpecifier(false, 0, InvalidSpecifier) {}
493 
ScanfConversionSpecifier(const char * pos,Kind k)494   ScanfConversionSpecifier(const char *pos, Kind k)
495     : ConversionSpecifier(false, pos, k) {}
496 
setEndScanList(const char * pos)497   void setEndScanList(const char *pos) { EndScanList = pos; }
498 
classof(const analyze_format_string::ConversionSpecifier * CS)499   static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
500     return !CS->isPrintfKind();
501   }
502 };
503 
504 using analyze_format_string::LengthModifier;
505 using analyze_format_string::OptionalAmount;
506 using analyze_format_string::OptionalFlag;
507 
508 class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
509   OptionalFlag SuppressAssignment; // '*'
510 public:
ScanfSpecifier()511   ScanfSpecifier() :
512     FormatSpecifier(/* isPrintf = */ false),
513     SuppressAssignment("*") {}
514 
setSuppressAssignment(const char * position)515   void setSuppressAssignment(const char *position) {
516     SuppressAssignment = true;
517     SuppressAssignment.setPosition(position);
518   }
519 
getSuppressAssignment()520   const OptionalFlag &getSuppressAssignment() const {
521     return SuppressAssignment;
522   }
523 
setConversionSpecifier(const ScanfConversionSpecifier & cs)524   void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
525     CS = cs;
526   }
527 
getConversionSpecifier()528   const ScanfConversionSpecifier &getConversionSpecifier() const {
529     return cast<ScanfConversionSpecifier>(CS);
530   }
531 
consumesDataArgument()532   bool consumesDataArgument() const {
533     return CS.consumesDataArgument() && !SuppressAssignment;
534   }
535 
536   static ScanfSpecifier Parse(const char *beg, const char *end);
537 };
538 
539 } // end analyze_scanf namespace
540 
541 //===----------------------------------------------------------------------===//
542 // Parsing and processing of format strings (both fprintf and fscanf).
543 
544 namespace analyze_format_string {
545 
546 enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
547 
548 class FormatStringHandler {
549 public:
FormatStringHandler()550   FormatStringHandler() {}
551   virtual ~FormatStringHandler();
552 
HandleNullChar(const char * nullCharacter)553   virtual void HandleNullChar(const char *nullCharacter) {}
554 
HandleInvalidPosition(const char * startPos,unsigned posLen,PositionContext p)555   virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
556                                      PositionContext p) {}
557 
HandleZeroPosition(const char * startPos,unsigned posLen)558   virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
559 
HandleIncompleteSpecifier(const char * startSpecifier,unsigned specifierLen)560   virtual void HandleIncompleteSpecifier(const char *startSpecifier,
561                                          unsigned specifierLen) {}
562 
563   // Printf-specific handlers.
564 
HandleInvalidPrintfConversionSpecifier(const analyze_printf::PrintfSpecifier & FS,const char * startSpecifier,unsigned specifierLen)565   virtual bool HandleInvalidPrintfConversionSpecifier(
566                                       const analyze_printf::PrintfSpecifier &FS,
567                                       const char *startSpecifier,
568                                       unsigned specifierLen) {
569     return true;
570   }
571 
HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier & FS,const char * startSpecifier,unsigned specifierLen)572   virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
573                                      const char *startSpecifier,
574                                      unsigned specifierLen) {
575     return true;
576   }
577 
578     // Scanf-specific handlers.
579 
HandleInvalidScanfConversionSpecifier(const analyze_scanf::ScanfSpecifier & FS,const char * startSpecifier,unsigned specifierLen)580   virtual bool HandleInvalidScanfConversionSpecifier(
581                                         const analyze_scanf::ScanfSpecifier &FS,
582                                         const char *startSpecifier,
583                                         unsigned specifierLen) {
584     return true;
585   }
586 
HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier & FS,const char * startSpecifier,unsigned specifierLen)587   virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
588                                     const char *startSpecifier,
589                                     unsigned specifierLen) {
590     return true;
591   }
592 
HandleIncompleteScanList(const char * start,const char * end)593   virtual void HandleIncompleteScanList(const char *start, const char *end) {}
594 };
595 
596 bool ParsePrintfString(FormatStringHandler &H,
597                        const char *beg, const char *end);
598 
599 bool ParseScanfString(FormatStringHandler &H,
600                        const char *beg, const char *end);
601 
602 } // end analyze_format_string namespace
603 } // end clang namespace
604 #endif
605