• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // FormatString.cpp - Common stuff for handling printf/scanf formats -*- C++ -*-
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Shared details for processing format strings of printf and scanf
11 // (and friends).
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "FormatStringParsing.h"
16 #include "clang/Basic/LangOptions.h"
17 
18 using clang::analyze_format_string::ArgTypeResult;
19 using clang::analyze_format_string::FormatStringHandler;
20 using clang::analyze_format_string::FormatSpecifier;
21 using clang::analyze_format_string::LengthModifier;
22 using clang::analyze_format_string::OptionalAmount;
23 using clang::analyze_format_string::PositionContext;
24 using clang::analyze_format_string::ConversionSpecifier;
25 using namespace clang;
26 
27 // Key function to FormatStringHandler.
~FormatStringHandler()28 FormatStringHandler::~FormatStringHandler() {}
29 
30 //===----------------------------------------------------------------------===//
31 // Functions for parsing format strings components in both printf and
32 // scanf format strings.
33 //===----------------------------------------------------------------------===//
34 
35 OptionalAmount
ParseAmount(const char * & Beg,const char * E)36 clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) {
37   const char *I = Beg;
38   UpdateOnReturn <const char*> UpdateBeg(Beg, I);
39 
40   unsigned accumulator = 0;
41   bool hasDigits = false;
42 
43   for ( ; I != E; ++I) {
44     char c = *I;
45     if (c >= '0' && c <= '9') {
46       hasDigits = true;
47       accumulator = (accumulator * 10) + (c - '0');
48       continue;
49     }
50 
51     if (hasDigits)
52       return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg,
53           false);
54 
55     break;
56   }
57 
58   return OptionalAmount();
59 }
60 
61 OptionalAmount
ParseNonPositionAmount(const char * & Beg,const char * E,unsigned & argIndex)62 clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg,
63                                                      const char *E,
64                                                      unsigned &argIndex) {
65   if (*Beg == '*') {
66     ++Beg;
67     return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false);
68   }
69 
70   return ParseAmount(Beg, E);
71 }
72 
73 OptionalAmount
ParsePositionAmount(FormatStringHandler & H,const char * Start,const char * & Beg,const char * E,PositionContext p)74 clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H,
75                                                   const char *Start,
76                                                   const char *&Beg,
77                                                   const char *E,
78                                                   PositionContext p) {
79   if (*Beg == '*') {
80     const char *I = Beg + 1;
81     const OptionalAmount &Amt = ParseAmount(I, E);
82 
83     if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) {
84       H.HandleInvalidPosition(Beg, I - Beg, p);
85       return OptionalAmount(false);
86     }
87 
88     if (I == E) {
89       // No more characters left?
90       H.HandleIncompleteSpecifier(Start, E - Start);
91       return OptionalAmount(false);
92     }
93 
94     assert(Amt.getHowSpecified() == OptionalAmount::Constant);
95 
96     if (*I == '$') {
97       // Handle positional arguments
98 
99       // Special case: '*0$', since this is an easy mistake.
100       if (Amt.getConstantAmount() == 0) {
101         H.HandleZeroPosition(Beg, I - Beg + 1);
102         return OptionalAmount(false);
103       }
104 
105       const char *Tmp = Beg;
106       Beg = ++I;
107 
108       return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1,
109                             Tmp, 0, true);
110     }
111 
112     H.HandleInvalidPosition(Beg, I - Beg, p);
113     return OptionalAmount(false);
114   }
115 
116   return ParseAmount(Beg, E);
117 }
118 
119 
120 bool
ParseFieldWidth(FormatStringHandler & H,FormatSpecifier & CS,const char * Start,const char * & Beg,const char * E,unsigned * argIndex)121 clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H,
122                                               FormatSpecifier &CS,
123                                               const char *Start,
124                                               const char *&Beg, const char *E,
125                                               unsigned *argIndex) {
126   // FIXME: Support negative field widths.
127   if (argIndex) {
128     CS.setFieldWidth(ParseNonPositionAmount(Beg, E, *argIndex));
129   }
130   else {
131     const OptionalAmount Amt =
132       ParsePositionAmount(H, Start, Beg, E,
133                           analyze_format_string::FieldWidthPos);
134 
135     if (Amt.isInvalid())
136       return true;
137     CS.setFieldWidth(Amt);
138   }
139   return false;
140 }
141 
142 bool
ParseArgPosition(FormatStringHandler & H,FormatSpecifier & FS,const char * Start,const char * & Beg,const char * E)143 clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H,
144                                                FormatSpecifier &FS,
145                                                const char *Start,
146                                                const char *&Beg,
147                                                const char *E) {
148   const char *I = Beg;
149 
150   const OptionalAmount &Amt = ParseAmount(I, E);
151 
152   if (I == E) {
153     // No more characters left?
154     H.HandleIncompleteSpecifier(Start, E - Start);
155     return true;
156   }
157 
158   if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') {
159     // Warn that positional arguments are non-standard.
160     H.HandlePosition(Start, I - Start);
161 
162     // Special case: '%0$', since this is an easy mistake.
163     if (Amt.getConstantAmount() == 0) {
164       H.HandleZeroPosition(Start, I - Start);
165       return true;
166     }
167 
168     FS.setArgIndex(Amt.getConstantAmount() - 1);
169     FS.setUsesPositionalArg();
170     // Update the caller's pointer if we decided to consume
171     // these characters.
172     Beg = I;
173     return false;
174   }
175 
176   return false;
177 }
178 
179 bool
ParseLengthModifier(FormatSpecifier & FS,const char * & I,const char * E,const LangOptions & LO,bool IsScanf)180 clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS,
181                                                   const char *&I,
182                                                   const char *E,
183                                                   const LangOptions &LO,
184                                                   bool IsScanf) {
185   LengthModifier::Kind lmKind = LengthModifier::None;
186   const char *lmPosition = I;
187   switch (*I) {
188     default:
189       return false;
190     case 'h':
191       ++I;
192       lmKind = (I != E && *I == 'h') ? (++I, LengthModifier::AsChar)
193                                      : LengthModifier::AsShort;
194       break;
195     case 'l':
196       ++I;
197       lmKind = (I != E && *I == 'l') ? (++I, LengthModifier::AsLongLong)
198                                      : LengthModifier::AsLong;
199       break;
200     case 'j': lmKind = LengthModifier::AsIntMax;     ++I; break;
201     case 'z': lmKind = LengthModifier::AsSizeT;      ++I; break;
202     case 't': lmKind = LengthModifier::AsPtrDiff;    ++I; break;
203     case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break;
204     case 'q': lmKind = LengthModifier::AsQuad;       ++I; break;
205     case 'a':
206       if (IsScanf && !LO.C99 && !LO.CPlusPlus0x) {
207         // For scanf in C90, look at the next character to see if this should
208         // be parsed as the GNU extension 'a' length modifier. If not, this
209         // will be parsed as a conversion specifier.
210         ++I;
211         if (I != E && (*I == 's' || *I == 'S' || *I == '[')) {
212           lmKind = LengthModifier::AsAllocate;
213           break;
214         }
215         --I;
216       }
217       return false;
218     case 'm':
219       if (IsScanf) {
220         lmKind = LengthModifier::AsMAllocate;
221         ++I;
222         break;
223       }
224       return false;
225   }
226   LengthModifier lm(lmPosition, lmKind);
227   FS.setLengthModifier(lm);
228   return true;
229 }
230 
231 //===----------------------------------------------------------------------===//
232 // Methods on ArgTypeResult.
233 //===----------------------------------------------------------------------===//
234 
matchesType(ASTContext & C,QualType argTy) const235 bool ArgTypeResult::matchesType(ASTContext &C, QualType argTy) const {
236   switch (K) {
237     case InvalidTy:
238       llvm_unreachable("ArgTypeResult must be valid");
239 
240     case UnknownTy:
241       return true;
242 
243     case AnyCharTy: {
244       if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
245         switch (BT->getKind()) {
246           default:
247             break;
248           case BuiltinType::Char_S:
249           case BuiltinType::SChar:
250           case BuiltinType::UChar:
251           case BuiltinType::Char_U:
252             return true;
253         }
254       return false;
255     }
256 
257     case SpecificTy: {
258       argTy = C.getCanonicalType(argTy).getUnqualifiedType();
259       if (T == argTy)
260         return true;
261       // Check for "compatible types".
262       if (const BuiltinType *BT = argTy->getAs<BuiltinType>())
263         switch (BT->getKind()) {
264           default:
265             break;
266           case BuiltinType::Char_S:
267           case BuiltinType::SChar:
268             return T == C.UnsignedCharTy;
269           case BuiltinType::Char_U:
270           case BuiltinType::UChar:
271             return T == C.SignedCharTy;
272           case BuiltinType::Short:
273             return T == C.UnsignedShortTy;
274           case BuiltinType::UShort:
275             return T == C.ShortTy;
276           case BuiltinType::Int:
277             return T == C.UnsignedIntTy;
278           case BuiltinType::UInt:
279             return T == C.IntTy;
280           case BuiltinType::Long:
281             return T == C.UnsignedLongTy;
282           case BuiltinType::ULong:
283             return T == C.LongTy;
284           case BuiltinType::LongLong:
285             return T == C.UnsignedLongLongTy;
286           case BuiltinType::ULongLong:
287             return T == C.LongLongTy;
288         }
289       return false;
290     }
291 
292     case CStrTy: {
293       const PointerType *PT = argTy->getAs<PointerType>();
294       if (!PT)
295         return false;
296       QualType pointeeTy = PT->getPointeeType();
297       if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>())
298         switch (BT->getKind()) {
299           case BuiltinType::Void:
300           case BuiltinType::Char_U:
301           case BuiltinType::UChar:
302           case BuiltinType::Char_S:
303           case BuiltinType::SChar:
304             return true;
305           default:
306             break;
307         }
308 
309       return false;
310     }
311 
312     case WCStrTy: {
313       const PointerType *PT = argTy->getAs<PointerType>();
314       if (!PT)
315         return false;
316       QualType pointeeTy =
317         C.getCanonicalType(PT->getPointeeType()).getUnqualifiedType();
318       return pointeeTy == C.getWCharType();
319     }
320 
321     case WIntTy: {
322       // Instead of doing a lookup for the definition of 'wint_t' (which
323       // is defined by the system headers) instead see if wchar_t and
324       // the argument type promote to the same type.
325       QualType PromoWChar =
326         C.getWCharType()->isPromotableIntegerType()
327           ? C.getPromotedIntegerType(C.getWCharType()) : C.getWCharType();
328       QualType PromoArg =
329         argTy->isPromotableIntegerType()
330           ? C.getPromotedIntegerType(argTy) : argTy;
331 
332       PromoWChar = C.getCanonicalType(PromoWChar).getUnqualifiedType();
333       PromoArg = C.getCanonicalType(PromoArg).getUnqualifiedType();
334 
335       return PromoWChar == PromoArg;
336     }
337 
338     case CPointerTy:
339       return argTy->isPointerType() || argTy->isObjCObjectPointerType() ||
340              argTy->isBlockPointerType() || argTy->isNullPtrType();
341 
342     case ObjCPointerTy: {
343       if (argTy->getAs<ObjCObjectPointerType>() ||
344           argTy->getAs<BlockPointerType>())
345         return true;
346 
347       // Handle implicit toll-free bridging.
348       if (const PointerType *PT = argTy->getAs<PointerType>()) {
349         // Things such as CFTypeRef are really just opaque pointers
350         // to C structs representing CF types that can often be bridged
351         // to Objective-C objects.  Since the compiler doesn't know which
352         // structs can be toll-free bridged, we just accept them all.
353         QualType pointee = PT->getPointeeType();
354         if (pointee->getAsStructureType() || pointee->isVoidType())
355           return true;
356       }
357       return false;
358     }
359   }
360 
361   llvm_unreachable("Invalid ArgTypeResult Kind!");
362 }
363 
getRepresentativeType(ASTContext & C) const364 QualType ArgTypeResult::getRepresentativeType(ASTContext &C) const {
365   switch (K) {
366     case InvalidTy:
367       llvm_unreachable("No representative type for Invalid ArgTypeResult");
368     case UnknownTy:
369       return QualType();
370     case AnyCharTy:
371       return C.CharTy;
372     case SpecificTy:
373       return T;
374     case CStrTy:
375       return C.getPointerType(C.CharTy);
376     case WCStrTy:
377       return C.getPointerType(C.getWCharType());
378     case ObjCPointerTy:
379       return C.ObjCBuiltinIdTy;
380     case CPointerTy:
381       return C.VoidPtrTy;
382     case WIntTy: {
383       QualType WC = C.getWCharType();
384       return WC->isPromotableIntegerType() ? C.getPromotedIntegerType(WC) : WC;
385     }
386   }
387 
388   llvm_unreachable("Invalid ArgTypeResult Kind!");
389 }
390 
getRepresentativeTypeName(ASTContext & C) const391 std::string ArgTypeResult::getRepresentativeTypeName(ASTContext &C) const {
392   std::string S = getRepresentativeType(C).getAsString();
393   if (Name && S != Name)
394     return std::string("'") + Name + "' (aka '" + S + "')";
395   return std::string("'") + S + "'";
396 }
397 
398 
399 //===----------------------------------------------------------------------===//
400 // Methods on OptionalAmount.
401 //===----------------------------------------------------------------------===//
402 
403 ArgTypeResult
getArgType(ASTContext & Ctx) const404 analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const {
405   return Ctx.IntTy;
406 }
407 
408 //===----------------------------------------------------------------------===//
409 // Methods on LengthModifier.
410 //===----------------------------------------------------------------------===//
411 
412 const char *
toString() const413 analyze_format_string::LengthModifier::toString() const {
414   switch (kind) {
415   case AsChar:
416     return "hh";
417   case AsShort:
418     return "h";
419   case AsLong: // or AsWideChar
420     return "l";
421   case AsLongLong:
422     return "ll";
423   case AsQuad:
424     return "q";
425   case AsIntMax:
426     return "j";
427   case AsSizeT:
428     return "z";
429   case AsPtrDiff:
430     return "t";
431   case AsLongDouble:
432     return "L";
433   case AsAllocate:
434     return "a";
435   case AsMAllocate:
436     return "m";
437   case None:
438     return "";
439   }
440   return NULL;
441 }
442 
443 //===----------------------------------------------------------------------===//
444 // Methods on ConversionSpecifier.
445 //===----------------------------------------------------------------------===//
446 
toString() const447 const char *ConversionSpecifier::toString() const {
448   switch (kind) {
449   case dArg: return "d";
450   case iArg: return "i";
451   case oArg: return "o";
452   case uArg: return "u";
453   case xArg: return "x";
454   case XArg: return "X";
455   case fArg: return "f";
456   case FArg: return "F";
457   case eArg: return "e";
458   case EArg: return "E";
459   case gArg: return "g";
460   case GArg: return "G";
461   case aArg: return "a";
462   case AArg: return "A";
463   case cArg: return "c";
464   case sArg: return "s";
465   case pArg: return "p";
466   case nArg: return "n";
467   case PercentArg:  return "%";
468   case ScanListArg: return "[";
469   case InvalidSpecifier: return NULL;
470 
471   // MacOS X unicode extensions.
472   case CArg: return "C";
473   case SArg: return "S";
474 
475   // Objective-C specific specifiers.
476   case ObjCObjArg: return "@";
477 
478   // GlibC specific specifiers.
479   case PrintErrno: return "m";
480   }
481   return NULL;
482 }
483 
484 //===----------------------------------------------------------------------===//
485 // Methods on OptionalAmount.
486 //===----------------------------------------------------------------------===//
487 
toString(raw_ostream & os) const488 void OptionalAmount::toString(raw_ostream &os) const {
489   switch (hs) {
490   case Invalid:
491   case NotSpecified:
492     return;
493   case Arg:
494     if (UsesDotPrefix)
495         os << ".";
496     if (usesPositionalArg())
497       os << "*" << getPositionalArgIndex() << "$";
498     else
499       os << "*";
500     break;
501   case Constant:
502     if (UsesDotPrefix)
503         os << ".";
504     os << amt;
505     break;
506   }
507 }
508 
hasValidLengthModifier() const509 bool FormatSpecifier::hasValidLengthModifier() const {
510   switch (LM.getKind()) {
511     case LengthModifier::None:
512       return true;
513 
514     // Handle most integer flags
515     case LengthModifier::AsChar:
516     case LengthModifier::AsShort:
517     case LengthModifier::AsLongLong:
518     case LengthModifier::AsQuad:
519     case LengthModifier::AsIntMax:
520     case LengthModifier::AsSizeT:
521     case LengthModifier::AsPtrDiff:
522       switch (CS.getKind()) {
523         case ConversionSpecifier::dArg:
524         case ConversionSpecifier::iArg:
525         case ConversionSpecifier::oArg:
526         case ConversionSpecifier::uArg:
527         case ConversionSpecifier::xArg:
528         case ConversionSpecifier::XArg:
529         case ConversionSpecifier::nArg:
530           return true;
531         default:
532           return false;
533       }
534 
535     // Handle 'l' flag
536     case LengthModifier::AsLong:
537       switch (CS.getKind()) {
538         case ConversionSpecifier::dArg:
539         case ConversionSpecifier::iArg:
540         case ConversionSpecifier::oArg:
541         case ConversionSpecifier::uArg:
542         case ConversionSpecifier::xArg:
543         case ConversionSpecifier::XArg:
544         case ConversionSpecifier::aArg:
545         case ConversionSpecifier::AArg:
546         case ConversionSpecifier::fArg:
547         case ConversionSpecifier::FArg:
548         case ConversionSpecifier::eArg:
549         case ConversionSpecifier::EArg:
550         case ConversionSpecifier::gArg:
551         case ConversionSpecifier::GArg:
552         case ConversionSpecifier::nArg:
553         case ConversionSpecifier::cArg:
554         case ConversionSpecifier::sArg:
555         case ConversionSpecifier::ScanListArg:
556           return true;
557         default:
558           return false;
559       }
560 
561     case LengthModifier::AsLongDouble:
562       switch (CS.getKind()) {
563         case ConversionSpecifier::aArg:
564         case ConversionSpecifier::AArg:
565         case ConversionSpecifier::fArg:
566         case ConversionSpecifier::FArg:
567         case ConversionSpecifier::eArg:
568         case ConversionSpecifier::EArg:
569         case ConversionSpecifier::gArg:
570         case ConversionSpecifier::GArg:
571           return true;
572         // GNU extension.
573         case ConversionSpecifier::dArg:
574         case ConversionSpecifier::iArg:
575         case ConversionSpecifier::oArg:
576         case ConversionSpecifier::uArg:
577         case ConversionSpecifier::xArg:
578         case ConversionSpecifier::XArg:
579           return true;
580         default:
581           return false;
582       }
583 
584     case LengthModifier::AsAllocate:
585       switch (CS.getKind()) {
586         case ConversionSpecifier::sArg:
587         case ConversionSpecifier::SArg:
588         case ConversionSpecifier::ScanListArg:
589           return true;
590         default:
591           return false;
592       }
593 
594     case LengthModifier::AsMAllocate:
595       switch (CS.getKind()) {
596         case ConversionSpecifier::cArg:
597         case ConversionSpecifier::CArg:
598         case ConversionSpecifier::sArg:
599         case ConversionSpecifier::SArg:
600         case ConversionSpecifier::ScanListArg:
601           return true;
602         default:
603           return false;
604       }
605   }
606   llvm_unreachable("Invalid LengthModifier Kind!");
607 }
608 
hasStandardLengthModifier() const609 bool FormatSpecifier::hasStandardLengthModifier() const {
610   switch (LM.getKind()) {
611     case LengthModifier::None:
612     case LengthModifier::AsChar:
613     case LengthModifier::AsShort:
614     case LengthModifier::AsLong:
615     case LengthModifier::AsLongLong:
616     case LengthModifier::AsIntMax:
617     case LengthModifier::AsSizeT:
618     case LengthModifier::AsPtrDiff:
619     case LengthModifier::AsLongDouble:
620       return true;
621     case LengthModifier::AsAllocate:
622     case LengthModifier::AsMAllocate:
623     case LengthModifier::AsQuad:
624       return false;
625   }
626   llvm_unreachable("Invalid LengthModifier Kind!");
627 }
628 
hasStandardConversionSpecifier(const LangOptions & LangOpt) const629 bool FormatSpecifier::hasStandardConversionSpecifier(const LangOptions &LangOpt) const {
630   switch (CS.getKind()) {
631     case ConversionSpecifier::cArg:
632     case ConversionSpecifier::dArg:
633     case ConversionSpecifier::iArg:
634     case ConversionSpecifier::oArg:
635     case ConversionSpecifier::uArg:
636     case ConversionSpecifier::xArg:
637     case ConversionSpecifier::XArg:
638     case ConversionSpecifier::fArg:
639     case ConversionSpecifier::FArg:
640     case ConversionSpecifier::eArg:
641     case ConversionSpecifier::EArg:
642     case ConversionSpecifier::gArg:
643     case ConversionSpecifier::GArg:
644     case ConversionSpecifier::aArg:
645     case ConversionSpecifier::AArg:
646     case ConversionSpecifier::sArg:
647     case ConversionSpecifier::pArg:
648     case ConversionSpecifier::nArg:
649     case ConversionSpecifier::ObjCObjArg:
650     case ConversionSpecifier::ScanListArg:
651     case ConversionSpecifier::PercentArg:
652       return true;
653     case ConversionSpecifier::CArg:
654     case ConversionSpecifier::SArg:
655       return LangOpt.ObjC1 || LangOpt.ObjC2;
656     case ConversionSpecifier::InvalidSpecifier:
657     case ConversionSpecifier::PrintErrno:
658       return false;
659   }
660   llvm_unreachable("Invalid ConversionSpecifier Kind!");
661 }
662 
hasStandardLengthConversionCombination() const663 bool FormatSpecifier::hasStandardLengthConversionCombination() const {
664   if (LM.getKind() == LengthModifier::AsLongDouble) {
665     switch(CS.getKind()) {
666         case ConversionSpecifier::dArg:
667         case ConversionSpecifier::iArg:
668         case ConversionSpecifier::oArg:
669         case ConversionSpecifier::uArg:
670         case ConversionSpecifier::xArg:
671         case ConversionSpecifier::XArg:
672           return false;
673         default:
674           return true;
675     }
676   }
677   return true;
678 }
679