• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //== PrintfFormatString.cpp - Analysis of printf format strings --*- C++ -*-==//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Handling of format string in printf and friends.  The structure of format
11 // strings for fprintf() are described in C99 7.19.6.1.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Analysis/Analyses/FormatString.h"
16 #include "FormatStringParsing.h"
17 
18 using clang::analyze_format_string::ArgTypeResult;
19 using clang::analyze_format_string::FormatStringHandler;
20 using clang::analyze_format_string::LengthModifier;
21 using clang::analyze_format_string::OptionalAmount;
22 using clang::analyze_format_string::ConversionSpecifier;
23 using clang::analyze_printf::PrintfSpecifier;
24 
25 using namespace clang;
26 
27 typedef clang::analyze_format_string::SpecifierResult<PrintfSpecifier>
28         PrintfSpecifierResult;
29 
30 //===----------------------------------------------------------------------===//
31 // Methods for parsing format strings.
32 //===----------------------------------------------------------------------===//
33 
34 using analyze_format_string::ParseNonPositionAmount;
35 
ParsePrecision(FormatStringHandler & H,PrintfSpecifier & FS,const char * Start,const char * & Beg,const char * E,unsigned * argIndex)36 static bool ParsePrecision(FormatStringHandler &H, PrintfSpecifier &FS,
37                            const char *Start, const char *&Beg, const char *E,
38                            unsigned *argIndex) {
39   if (argIndex) {
40     FS.setPrecision(ParseNonPositionAmount(Beg, E, *argIndex));
41   }
42   else {
43     const OptionalAmount Amt = ParsePositionAmount(H, Start, Beg, E,
44                                            analyze_format_string::PrecisionPos);
45     if (Amt.isInvalid())
46       return true;
47     FS.setPrecision(Amt);
48   }
49   return false;
50 }
51 
ParsePrintfSpecifier(FormatStringHandler & H,const char * & Beg,const char * E,unsigned & argIndex)52 static PrintfSpecifierResult ParsePrintfSpecifier(FormatStringHandler &H,
53                                                   const char *&Beg,
54                                                   const char *E,
55                                                   unsigned &argIndex) {
56 
57   using namespace clang::analyze_format_string;
58   using namespace clang::analyze_printf;
59 
60   const char *I = Beg;
61   const char *Start = 0;
62   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
63 
64   // Look for a '%' character that indicates the start of a format specifier.
65   for ( ; I != E ; ++I) {
66     char c = *I;
67     if (c == '\0') {
68       // Detect spurious null characters, which are likely errors.
69       H.HandleNullChar(I);
70       return true;
71     }
72     if (c == '%') {
73       Start = I++;  // Record the start of the format specifier.
74       break;
75     }
76   }
77 
78   // No format specifier found?
79   if (!Start)
80     return false;
81 
82   if (I == E) {
83     // No more characters left?
84     H.HandleIncompleteSpecifier(Start, E - Start);
85     return true;
86   }
87 
88   PrintfSpecifier FS;
89   if (ParseArgPosition(H, FS, Start, I, E))
90     return true;
91 
92   if (I == E) {
93     // No more characters left?
94     H.HandleIncompleteSpecifier(Start, E - Start);
95     return true;
96   }
97 
98   // Look for flags (if any).
99   bool hasMore = true;
100   for ( ; I != E; ++I) {
101     switch (*I) {
102       default: hasMore = false; break;
103       case '\'':
104         // FIXME: POSIX specific.  Always accept?
105         FS.setHasThousandsGrouping(I);
106         break;
107       case '-': FS.setIsLeftJustified(I); break;
108       case '+': FS.setHasPlusPrefix(I); break;
109       case ' ': FS.setHasSpacePrefix(I); break;
110       case '#': FS.setHasAlternativeForm(I); break;
111       case '0': FS.setHasLeadingZeros(I); break;
112     }
113     if (!hasMore)
114       break;
115   }
116 
117   if (I == E) {
118     // No more characters left?
119     H.HandleIncompleteSpecifier(Start, E - Start);
120     return true;
121   }
122 
123   // Look for the field width (if any).
124   if (ParseFieldWidth(H, FS, Start, I, E,
125                       FS.usesPositionalArg() ? 0 : &argIndex))
126     return true;
127 
128   if (I == E) {
129     // No more characters left?
130     H.HandleIncompleteSpecifier(Start, E - Start);
131     return true;
132   }
133 
134   // Look for the precision (if any).
135   if (*I == '.') {
136     ++I;
137     if (I == E) {
138       H.HandleIncompleteSpecifier(Start, E - Start);
139       return true;
140     }
141 
142     if (ParsePrecision(H, FS, Start, I, E,
143                        FS.usesPositionalArg() ? 0 : &argIndex))
144       return true;
145 
146     if (I == E) {
147       // No more characters left?
148       H.HandleIncompleteSpecifier(Start, E - Start);
149       return true;
150     }
151   }
152 
153   // Look for the length modifier.
154   if (ParseLengthModifier(FS, I, E) && I == E) {
155     // No more characters left?
156     H.HandleIncompleteSpecifier(Start, E - Start);
157     return true;
158   }
159 
160   if (*I == '\0') {
161     // Detect spurious null characters, which are likely errors.
162     H.HandleNullChar(I);
163     return true;
164   }
165 
166   // Finally, look for the conversion specifier.
167   const char *conversionPosition = I++;
168   ConversionSpecifier::Kind k = ConversionSpecifier::InvalidSpecifier;
169   switch (*conversionPosition) {
170     default:
171       break;
172     // C99: 7.19.6.1 (section 8).
173     case '%': k = ConversionSpecifier::PercentArg;   break;
174     case 'A': k = ConversionSpecifier::AArg; break;
175     case 'E': k = ConversionSpecifier::EArg; break;
176     case 'F': k = ConversionSpecifier::FArg; break;
177     case 'G': k = ConversionSpecifier::GArg; break;
178     case 'X': k = ConversionSpecifier::XArg; break;
179     case 'a': k = ConversionSpecifier::aArg; break;
180     case 'c': k = ConversionSpecifier::cArg; break;
181     case 'd': k = ConversionSpecifier::dArg; break;
182     case 'e': k = ConversionSpecifier::eArg; break;
183     case 'f': k = ConversionSpecifier::fArg; break;
184     case 'g': k = ConversionSpecifier::gArg; break;
185     case 'i': k = ConversionSpecifier::iArg; break;
186     case 'n': k = ConversionSpecifier::nArg; break;
187     case 'o': k = ConversionSpecifier::oArg; break;
188     case 'p': k = ConversionSpecifier::pArg;   break;
189     case 's': k = ConversionSpecifier::sArg;      break;
190     case 'u': k = ConversionSpecifier::uArg; break;
191     case 'x': k = ConversionSpecifier::xArg; break;
192     // POSIX specific.
193     case 'C': k = ConversionSpecifier::CArg; break;
194     case 'S': k = ConversionSpecifier::SArg; break;
195     // Objective-C.
196     case '@': k = ConversionSpecifier::ObjCObjArg; break;
197     // Glibc specific.
198     case 'm': k = ConversionSpecifier::PrintErrno; break;
199   }
200   PrintfConversionSpecifier CS(conversionPosition, k);
201   FS.setConversionSpecifier(CS);
202   if (CS.consumesDataArgument() && !FS.usesPositionalArg())
203     FS.setArgIndex(argIndex++);
204 
205   if (k == ConversionSpecifier::InvalidSpecifier) {
206     // Assume the conversion takes one argument.
207     return !H.HandleInvalidPrintfConversionSpecifier(FS, Start, I - Start);
208   }
209   return PrintfSpecifierResult(Start, FS);
210 }
211 
ParsePrintfString(FormatStringHandler & H,const char * I,const char * E)212 bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
213                                                      const char *I,
214                                                      const char *E) {
215 
216   unsigned argIndex = 0;
217 
218   // Keep looking for a format specifier until we have exhausted the string.
219   while (I != E) {
220     const PrintfSpecifierResult &FSR = ParsePrintfSpecifier(H, I, E, argIndex);
221     // Did a fail-stop error of any kind occur when parsing the specifier?
222     // If so, don't do any more processing.
223     if (FSR.shouldStop())
224       return true;;
225     // Did we exhaust the string or encounter an error that
226     // we can recover from?
227     if (!FSR.hasValue())
228       continue;
229     // We have a format specifier.  Pass it to the callback.
230     if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
231                                  I - FSR.getStart()))
232       return true;
233   }
234   assert(I == E && "Format string not exhausted");
235   return false;
236 }
237 
238 //===----------------------------------------------------------------------===//
239 // Methods on ConversionSpecifier.
240 //===----------------------------------------------------------------------===//
toString() const241 const char *ConversionSpecifier::toString() const {
242   switch (kind) {
243   case dArg: return "d";
244   case iArg: return "i";
245   case oArg: return "o";
246   case uArg: return "u";
247   case xArg: return "x";
248   case XArg: return "X";
249   case fArg: return "f";
250   case FArg: return "F";
251   case eArg: return "e";
252   case EArg: return "E";
253   case gArg: return "g";
254   case GArg: return "G";
255   case aArg: return "a";
256   case AArg: return "A";
257   case cArg: return "c";
258   case sArg: return "s";
259   case pArg: return "p";
260   case nArg: return "n";
261   case PercentArg:  return "%";
262   case ScanListArg: return "[";
263   case InvalidSpecifier: return NULL;
264 
265   // MacOS X unicode extensions.
266   case CArg: return "C";
267   case SArg: return "S";
268 
269   // Objective-C specific specifiers.
270   case ObjCObjArg: return "@";
271 
272   // GlibC specific specifiers.
273   case PrintErrno: return "m";
274   }
275   return NULL;
276 }
277 
278 //===----------------------------------------------------------------------===//
279 // Methods on PrintfSpecifier.
280 //===----------------------------------------------------------------------===//
281 
getArgType(ASTContext & Ctx) const282 ArgTypeResult PrintfSpecifier::getArgType(ASTContext &Ctx) const {
283   const PrintfConversionSpecifier &CS = getConversionSpecifier();
284 
285   if (!CS.consumesDataArgument())
286     return ArgTypeResult::Invalid();
287 
288   if (CS.getKind() == ConversionSpecifier::cArg)
289     switch (LM.getKind()) {
290       case LengthModifier::None: return Ctx.IntTy;
291       case LengthModifier::AsLong: return ArgTypeResult::WIntTy;
292       default:
293         return ArgTypeResult::Invalid();
294     }
295 
296   if (CS.isIntArg())
297     switch (LM.getKind()) {
298       case LengthModifier::AsLongDouble:
299         return ArgTypeResult::Invalid();
300       case LengthModifier::None: return Ctx.IntTy;
301       case LengthModifier::AsChar: return Ctx.SignedCharTy;
302       case LengthModifier::AsShort: return Ctx.ShortTy;
303       case LengthModifier::AsLong: return Ctx.LongTy;
304       case LengthModifier::AsLongLong: return Ctx.LongLongTy;
305       case LengthModifier::AsIntMax:
306         // FIXME: Return unknown for now.
307         return ArgTypeResult();
308       case LengthModifier::AsSizeT: return Ctx.getSizeType();
309       case LengthModifier::AsPtrDiff: return Ctx.getPointerDiffType();
310     }
311 
312   if (CS.isUIntArg())
313     switch (LM.getKind()) {
314       case LengthModifier::AsLongDouble:
315         return ArgTypeResult::Invalid();
316       case LengthModifier::None: return Ctx.UnsignedIntTy;
317       case LengthModifier::AsChar: return Ctx.UnsignedCharTy;
318       case LengthModifier::AsShort: return Ctx.UnsignedShortTy;
319       case LengthModifier::AsLong: return Ctx.UnsignedLongTy;
320       case LengthModifier::AsLongLong: return Ctx.UnsignedLongLongTy;
321       case LengthModifier::AsIntMax:
322         // FIXME: Return unknown for now.
323         return ArgTypeResult();
324       case LengthModifier::AsSizeT:
325         // FIXME: How to get the corresponding unsigned
326         // version of size_t?
327         return ArgTypeResult();
328       case LengthModifier::AsPtrDiff:
329         // FIXME: How to get the corresponding unsigned
330         // version of ptrdiff_t?
331         return ArgTypeResult();
332     }
333 
334   if (CS.isDoubleArg()) {
335     if (LM.getKind() == LengthModifier::AsLongDouble)
336       return Ctx.LongDoubleTy;
337     return Ctx.DoubleTy;
338   }
339 
340   switch (CS.getKind()) {
341     case ConversionSpecifier::sArg:
342       return ArgTypeResult(LM.getKind() == LengthModifier::AsWideChar ?
343           ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy);
344     case ConversionSpecifier::SArg:
345       // FIXME: This appears to be Mac OS X specific.
346       return ArgTypeResult::WCStrTy;
347     case ConversionSpecifier::CArg:
348       return Ctx.WCharTy;
349     case ConversionSpecifier::pArg:
350       return ArgTypeResult::CPointerTy;
351     default:
352       break;
353   }
354 
355   // FIXME: Handle other cases.
356   return ArgTypeResult();
357 }
358 
fixType(QualType QT)359 bool PrintfSpecifier::fixType(QualType QT) {
360   // Handle strings first (char *, wchar_t *)
361   if (QT->isPointerType() && (QT->getPointeeType()->isAnyCharacterType())) {
362     CS.setKind(ConversionSpecifier::sArg);
363 
364     // Disable irrelevant flags
365     HasAlternativeForm = 0;
366     HasLeadingZeroes = 0;
367 
368     // Set the long length modifier for wide characters
369     if (QT->getPointeeType()->isWideCharType())
370       LM.setKind(LengthModifier::AsWideChar);
371 
372     return true;
373   }
374 
375   // We can only work with builtin types.
376   if (!QT->isBuiltinType())
377     return false;
378 
379   // Everything else should be a base type
380   const BuiltinType *BT = QT->getAs<BuiltinType>();
381 
382   // Set length modifier
383   switch (BT->getKind()) {
384   case BuiltinType::Bool:
385   case BuiltinType::WChar_U:
386   case BuiltinType::WChar_S:
387   case BuiltinType::Char16:
388   case BuiltinType::Char32:
389   case BuiltinType::UInt128:
390   case BuiltinType::Int128:
391     // Integral types which are non-trivial to correct.
392     return false;
393 
394   case BuiltinType::Void:
395   case BuiltinType::NullPtr:
396   case BuiltinType::ObjCId:
397   case BuiltinType::ObjCClass:
398   case BuiltinType::ObjCSel:
399   case BuiltinType::Dependent:
400   case BuiltinType::Overload:
401   case BuiltinType::BoundMember:
402   case BuiltinType::UnknownAny:
403     // Misc other stuff which doesn't make sense here.
404     return false;
405 
406   case BuiltinType::UInt:
407   case BuiltinType::Int:
408   case BuiltinType::Float:
409   case BuiltinType::Double:
410     LM.setKind(LengthModifier::None);
411     break;
412 
413   case BuiltinType::Char_U:
414   case BuiltinType::UChar:
415   case BuiltinType::Char_S:
416   case BuiltinType::SChar:
417     LM.setKind(LengthModifier::AsChar);
418     break;
419 
420   case BuiltinType::Short:
421   case BuiltinType::UShort:
422     LM.setKind(LengthModifier::AsShort);
423     break;
424 
425   case BuiltinType::Long:
426   case BuiltinType::ULong:
427     LM.setKind(LengthModifier::AsLong);
428     break;
429 
430   case BuiltinType::LongLong:
431   case BuiltinType::ULongLong:
432     LM.setKind(LengthModifier::AsLongLong);
433     break;
434 
435   case BuiltinType::LongDouble:
436     LM.setKind(LengthModifier::AsLongDouble);
437     break;
438   }
439 
440   // Set conversion specifier and disable any flags which do not apply to it.
441   // Let typedefs to char fall through to int, as %c is silly for uint8_t.
442   if (isa<TypedefType>(QT) && QT->isAnyCharacterType()) {
443     CS.setKind(ConversionSpecifier::cArg);
444     LM.setKind(LengthModifier::None);
445     Precision.setHowSpecified(OptionalAmount::NotSpecified);
446     HasAlternativeForm = 0;
447     HasLeadingZeroes = 0;
448     HasPlusPrefix = 0;
449   }
450   // Test for Floating type first as LongDouble can pass isUnsignedIntegerType
451   else if (QT->isRealFloatingType()) {
452     CS.setKind(ConversionSpecifier::fArg);
453   }
454   else if (QT->isSignedIntegerType()) {
455     CS.setKind(ConversionSpecifier::dArg);
456     HasAlternativeForm = 0;
457   }
458   else if (QT->isUnsignedIntegerType()) {
459     // Preserve the original formatting, e.g. 'X', 'o'.
460     if (!cast<PrintfConversionSpecifier>(CS).isUIntArg())
461       CS.setKind(ConversionSpecifier::uArg);
462     HasAlternativeForm = 0;
463     HasPlusPrefix = 0;
464   }
465   else {
466     assert(0 && "Unexpected type");
467   }
468 
469   return true;
470 }
471 
toString(llvm::raw_ostream & os) const472 void PrintfSpecifier::toString(llvm::raw_ostream &os) const {
473   // Whilst some features have no defined order, we are using the order
474   // appearing in the C99 standard (ISO/IEC 9899:1999 (E) 7.19.6.1)
475   os << "%";
476 
477   // Positional args
478   if (usesPositionalArg()) {
479     os << getPositionalArgIndex() << "$";
480   }
481 
482   // Conversion flags
483   if (IsLeftJustified)    os << "-";
484   if (HasPlusPrefix)      os << "+";
485   if (HasSpacePrefix)     os << " ";
486   if (HasAlternativeForm) os << "#";
487   if (HasLeadingZeroes)   os << "0";
488 
489   // Minimum field width
490   FieldWidth.toString(os);
491   // Precision
492   Precision.toString(os);
493   // Length modifier
494   os << LM.toString();
495   // Conversion specifier
496   os << CS.toString();
497 }
498 
hasValidPlusPrefix() const499 bool PrintfSpecifier::hasValidPlusPrefix() const {
500   if (!HasPlusPrefix)
501     return true;
502 
503   // The plus prefix only makes sense for signed conversions
504   switch (CS.getKind()) {
505   case ConversionSpecifier::dArg:
506   case ConversionSpecifier::iArg:
507   case ConversionSpecifier::fArg:
508   case ConversionSpecifier::FArg:
509   case ConversionSpecifier::eArg:
510   case ConversionSpecifier::EArg:
511   case ConversionSpecifier::gArg:
512   case ConversionSpecifier::GArg:
513   case ConversionSpecifier::aArg:
514   case ConversionSpecifier::AArg:
515     return true;
516 
517   default:
518     return false;
519   }
520 }
521 
hasValidAlternativeForm() const522 bool PrintfSpecifier::hasValidAlternativeForm() const {
523   if (!HasAlternativeForm)
524     return true;
525 
526   // Alternate form flag only valid with the oxXaAeEfFgG conversions
527   switch (CS.getKind()) {
528   case ConversionSpecifier::oArg:
529   case ConversionSpecifier::xArg:
530   case ConversionSpecifier::XArg:
531   case ConversionSpecifier::aArg:
532   case ConversionSpecifier::AArg:
533   case ConversionSpecifier::eArg:
534   case ConversionSpecifier::EArg:
535   case ConversionSpecifier::fArg:
536   case ConversionSpecifier::FArg:
537   case ConversionSpecifier::gArg:
538   case ConversionSpecifier::GArg:
539     return true;
540 
541   default:
542     return false;
543   }
544 }
545 
hasValidLeadingZeros() const546 bool PrintfSpecifier::hasValidLeadingZeros() const {
547   if (!HasLeadingZeroes)
548     return true;
549 
550   // Leading zeroes flag only valid with the diouxXaAeEfFgG conversions
551   switch (CS.getKind()) {
552   case ConversionSpecifier::dArg:
553   case ConversionSpecifier::iArg:
554   case ConversionSpecifier::oArg:
555   case ConversionSpecifier::uArg:
556   case ConversionSpecifier::xArg:
557   case ConversionSpecifier::XArg:
558   case ConversionSpecifier::aArg:
559   case ConversionSpecifier::AArg:
560   case ConversionSpecifier::eArg:
561   case ConversionSpecifier::EArg:
562   case ConversionSpecifier::fArg:
563   case ConversionSpecifier::FArg:
564   case ConversionSpecifier::gArg:
565   case ConversionSpecifier::GArg:
566     return true;
567 
568   default:
569     return false;
570   }
571 }
572 
hasValidSpacePrefix() const573 bool PrintfSpecifier::hasValidSpacePrefix() const {
574   if (!HasSpacePrefix)
575     return true;
576 
577   // The space prefix only makes sense for signed conversions
578   switch (CS.getKind()) {
579   case ConversionSpecifier::dArg:
580   case ConversionSpecifier::iArg:
581   case ConversionSpecifier::fArg:
582   case ConversionSpecifier::FArg:
583   case ConversionSpecifier::eArg:
584   case ConversionSpecifier::EArg:
585   case ConversionSpecifier::gArg:
586   case ConversionSpecifier::GArg:
587   case ConversionSpecifier::aArg:
588   case ConversionSpecifier::AArg:
589     return true;
590 
591   default:
592     return false;
593   }
594 }
595 
hasValidLeftJustified() const596 bool PrintfSpecifier::hasValidLeftJustified() const {
597   if (!IsLeftJustified)
598     return true;
599 
600   // The left justified flag is valid for all conversions except n
601   switch (CS.getKind()) {
602   case ConversionSpecifier::nArg:
603     return false;
604 
605   default:
606     return true;
607   }
608 }
609 
hasValidThousandsGroupingPrefix() const610 bool PrintfSpecifier::hasValidThousandsGroupingPrefix() const {
611   if (!HasThousandsGrouping)
612     return true;
613 
614   switch (CS.getKind()) {
615     case ConversionSpecifier::dArg:
616     case ConversionSpecifier::iArg:
617     case ConversionSpecifier::uArg:
618     case ConversionSpecifier::fArg:
619     case ConversionSpecifier::FArg:
620     case ConversionSpecifier::gArg:
621     case ConversionSpecifier::GArg:
622       return true;
623     default:
624       return false;
625   }
626 }
627 
hasValidPrecision() const628 bool PrintfSpecifier::hasValidPrecision() const {
629   if (Precision.getHowSpecified() == OptionalAmount::NotSpecified)
630     return true;
631 
632   // Precision is only valid with the diouxXaAeEfFgGs conversions
633   switch (CS.getKind()) {
634   case ConversionSpecifier::dArg:
635   case ConversionSpecifier::iArg:
636   case ConversionSpecifier::oArg:
637   case ConversionSpecifier::uArg:
638   case ConversionSpecifier::xArg:
639   case ConversionSpecifier::XArg:
640   case ConversionSpecifier::aArg:
641   case ConversionSpecifier::AArg:
642   case ConversionSpecifier::eArg:
643   case ConversionSpecifier::EArg:
644   case ConversionSpecifier::fArg:
645   case ConversionSpecifier::FArg:
646   case ConversionSpecifier::gArg:
647   case ConversionSpecifier::GArg:
648   case ConversionSpecifier::sArg:
649     return true;
650 
651   default:
652     return false;
653   }
654 }
hasValidFieldWidth() const655 bool PrintfSpecifier::hasValidFieldWidth() const {
656   if (FieldWidth.getHowSpecified() == OptionalAmount::NotSpecified)
657       return true;
658 
659   // The field width is valid for all conversions except n
660   switch (CS.getKind()) {
661   case ConversionSpecifier::nArg:
662     return false;
663 
664   default:
665     return true;
666   }
667 }
668