1 //= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines APIs for analyzing the format strings of printf, fscanf, 11 // and friends. 12 // 13 // The structure of format strings for fprintf are described in C99 7.19.6.1. 14 // 15 // The structure of format strings for fscanf are described in C99 7.19.6.2. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #ifndef LLVM_CLANG_FORMAT_H 20 #define LLVM_CLANG_FORMAT_H 21 22 #include "clang/AST/CanonicalType.h" 23 24 namespace clang { 25 26 class TargetInfo; 27 28 //===----------------------------------------------------------------------===// 29 /// Common components of both fprintf and fscanf format strings. 30 namespace analyze_format_string { 31 32 /// Class representing optional flags with location and representation 33 /// information. 34 class OptionalFlag { 35 public: OptionalFlag(const char * Representation)36 OptionalFlag(const char *Representation) 37 : representation(Representation), flag(false) {} isSet()38 bool isSet() { return flag; } set()39 void set() { flag = true; } clear()40 void clear() { flag = false; } setPosition(const char * position)41 void setPosition(const char *position) { 42 assert(position); 43 this->position = position; 44 } getPosition()45 const char *getPosition() const { 46 assert(position); 47 return position; 48 } toString()49 const char *toString() const { return representation; } 50 51 // Overloaded operators for bool like qualities 52 LLVM_EXPLICIT operator bool() const { return flag; } 53 OptionalFlag& operator=(const bool &rhs) { 54 flag = rhs; 55 return *this; // Return a reference to myself. 56 } 57 private: 58 const char *representation; 59 const char *position; 60 bool flag; 61 }; 62 63 /// Represents the length modifier in a format string in scanf/printf. 64 class LengthModifier { 65 public: 66 enum Kind { 67 None, 68 AsChar, // 'hh' 69 AsShort, // 'h' 70 AsLong, // 'l' 71 AsLongLong, // 'll' 72 AsQuad, // 'q' (BSD, deprecated, for 64-bit integer types) 73 AsIntMax, // 'j' 74 AsSizeT, // 'z' 75 AsPtrDiff, // 't' 76 AsLongDouble, // 'L' 77 AsAllocate, // for '%as', GNU extension to C90 scanf 78 AsMAllocate, // for '%ms', GNU extension to scanf 79 AsWideChar = AsLong // for '%ls', only makes sense for printf 80 }; 81 LengthModifier()82 LengthModifier() 83 : Position(0), kind(None) {} LengthModifier(const char * pos,Kind k)84 LengthModifier(const char *pos, Kind k) 85 : Position(pos), kind(k) {} 86 getStart()87 const char *getStart() const { 88 return Position; 89 } 90 getLength()91 unsigned getLength() const { 92 switch (kind) { 93 default: 94 return 1; 95 case AsLongLong: 96 case AsChar: 97 return 2; 98 case None: 99 return 0; 100 } 101 } 102 getKind()103 Kind getKind() const { return kind; } setKind(Kind k)104 void setKind(Kind k) { kind = k; } 105 106 const char *toString() const; 107 108 private: 109 const char *Position; 110 Kind kind; 111 }; 112 113 class ConversionSpecifier { 114 public: 115 enum Kind { 116 InvalidSpecifier = 0, 117 // C99 conversion specifiers. 118 cArg, 119 dArg, 120 DArg, // Apple extension 121 iArg, 122 IntArgBeg = dArg, IntArgEnd = iArg, 123 124 oArg, 125 OArg, // Apple extension 126 uArg, 127 UArg, // Apple extension 128 xArg, 129 XArg, 130 UIntArgBeg = oArg, UIntArgEnd = XArg, 131 132 fArg, 133 FArg, 134 eArg, 135 EArg, 136 gArg, 137 GArg, 138 aArg, 139 AArg, 140 DoubleArgBeg = fArg, DoubleArgEnd = AArg, 141 142 sArg, 143 pArg, 144 nArg, 145 PercentArg, 146 CArg, 147 SArg, 148 149 // ** Printf-specific ** 150 151 // Objective-C specific specifiers. 152 ObjCObjArg, // '@' 153 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg, 154 155 // GlibC specific specifiers. 156 PrintErrno, // 'm' 157 158 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno, 159 160 // ** Scanf-specific ** 161 ScanListArg, // '[' 162 ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg 163 }; 164 165 ConversionSpecifier(bool isPrintf = true) IsPrintf(isPrintf)166 : IsPrintf(isPrintf), Position(0), EndScanList(0), kind(InvalidSpecifier) {} 167 ConversionSpecifier(bool isPrintf,const char * pos,Kind k)168 ConversionSpecifier(bool isPrintf, const char *pos, Kind k) 169 : IsPrintf(isPrintf), Position(pos), EndScanList(0), kind(k) {} 170 getStart()171 const char *getStart() const { 172 return Position; 173 } 174 getCharacters()175 StringRef getCharacters() const { 176 return StringRef(getStart(), getLength()); 177 } 178 consumesDataArgument()179 bool consumesDataArgument() const { 180 switch (kind) { 181 case PrintErrno: 182 assert(IsPrintf); 183 return false; 184 case PercentArg: 185 return false; 186 default: 187 return true; 188 } 189 } 190 getKind()191 Kind getKind() const { return kind; } setKind(Kind k)192 void setKind(Kind k) { kind = k; } getLength()193 unsigned getLength() const { 194 return EndScanList ? EndScanList - Position : 1; 195 } 196 isIntArg()197 bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; } isUIntArg()198 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; } isAnyIntArg()199 bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; } 200 const char *toString() const; 201 isPrintfKind()202 bool isPrintfKind() const { return IsPrintf; } 203 204 Optional<ConversionSpecifier> getStandardSpecifier() const; 205 206 protected: 207 bool IsPrintf; 208 const char *Position; 209 const char *EndScanList; 210 Kind kind; 211 }; 212 213 class ArgType { 214 public: 215 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, 216 AnyCharTy, CStrTy, WCStrTy, WIntTy }; 217 private: 218 const Kind K; 219 QualType T; 220 const char *Name; 221 bool Ptr; 222 public: K(k)223 ArgType(Kind k = UnknownTy, const char *n = 0) : K(k), Name(n), Ptr(false) {} 224 ArgType(QualType t, const char *n = 0) K(SpecificTy)225 : K(SpecificTy), T(t), Name(n), Ptr(false) {} ArgType(CanQualType t)226 ArgType(CanQualType t) : K(SpecificTy), T(t), Name(0), Ptr(false) {} 227 Invalid()228 static ArgType Invalid() { return ArgType(InvalidTy); } isValid()229 bool isValid() const { return K != InvalidTy; } 230 231 /// Create an ArgType which corresponds to the type pointer to A. PtrTo(const ArgType & A)232 static ArgType PtrTo(const ArgType& A) { 233 assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown"); 234 ArgType Res = A; 235 Res.Ptr = true; 236 return Res; 237 } 238 239 bool matchesType(ASTContext &C, QualType argTy) const; 240 241 QualType getRepresentativeType(ASTContext &C) const; 242 243 std::string getRepresentativeTypeName(ASTContext &C) const; 244 }; 245 246 class OptionalAmount { 247 public: 248 enum HowSpecified { NotSpecified, Constant, Arg, Invalid }; 249 OptionalAmount(HowSpecified howSpecified,unsigned amount,const char * amountStart,unsigned amountLength,bool usesPositionalArg)250 OptionalAmount(HowSpecified howSpecified, 251 unsigned amount, 252 const char *amountStart, 253 unsigned amountLength, 254 bool usesPositionalArg) 255 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount), 256 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {} 257 258 OptionalAmount(bool valid = true) 259 : start(0),length(0), hs(valid ? NotSpecified : Invalid), amt(0), 260 UsesPositionalArg(0), UsesDotPrefix(0) {} 261 isInvalid()262 bool isInvalid() const { 263 return hs == Invalid; 264 } 265 getHowSpecified()266 HowSpecified getHowSpecified() const { return hs; } setHowSpecified(HowSpecified h)267 void setHowSpecified(HowSpecified h) { hs = h; } 268 hasDataArgument()269 bool hasDataArgument() const { return hs == Arg; } 270 getArgIndex()271 unsigned getArgIndex() const { 272 assert(hasDataArgument()); 273 return amt; 274 } 275 getConstantAmount()276 unsigned getConstantAmount() const { 277 assert(hs == Constant); 278 return amt; 279 } 280 getStart()281 const char *getStart() const { 282 // We include the . character if it is given. 283 return start - UsesDotPrefix; 284 } 285 getConstantLength()286 unsigned getConstantLength() const { 287 assert(hs == Constant); 288 return length + UsesDotPrefix; 289 } 290 291 ArgType getArgType(ASTContext &Ctx) const; 292 293 void toString(raw_ostream &os) const; 294 usesPositionalArg()295 bool usesPositionalArg() const { return (bool) UsesPositionalArg; } getPositionalArgIndex()296 unsigned getPositionalArgIndex() const { 297 assert(hasDataArgument()); 298 return amt + 1; 299 } 300 usesDotPrefix()301 bool usesDotPrefix() const { return UsesDotPrefix; } setUsesDotPrefix()302 void setUsesDotPrefix() { UsesDotPrefix = true; } 303 304 private: 305 const char *start; 306 unsigned length; 307 HowSpecified hs; 308 unsigned amt; 309 bool UsesPositionalArg : 1; 310 bool UsesDotPrefix; 311 }; 312 313 314 class FormatSpecifier { 315 protected: 316 LengthModifier LM; 317 OptionalAmount FieldWidth; 318 ConversionSpecifier CS; 319 /// Positional arguments, an IEEE extension: 320 /// IEEE Std 1003.1, 2004 Edition 321 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html 322 bool UsesPositionalArg; 323 unsigned argIndex; 324 public: FormatSpecifier(bool isPrintf)325 FormatSpecifier(bool isPrintf) 326 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {} 327 setLengthModifier(LengthModifier lm)328 void setLengthModifier(LengthModifier lm) { 329 LM = lm; 330 } 331 setUsesPositionalArg()332 void setUsesPositionalArg() { UsesPositionalArg = true; } 333 setArgIndex(unsigned i)334 void setArgIndex(unsigned i) { 335 argIndex = i; 336 } 337 getArgIndex()338 unsigned getArgIndex() const { 339 return argIndex; 340 } 341 getPositionalArgIndex()342 unsigned getPositionalArgIndex() const { 343 return argIndex + 1; 344 } 345 getLengthModifier()346 const LengthModifier &getLengthModifier() const { 347 return LM; 348 } 349 getFieldWidth()350 const OptionalAmount &getFieldWidth() const { 351 return FieldWidth; 352 } 353 setFieldWidth(const OptionalAmount & Amt)354 void setFieldWidth(const OptionalAmount &Amt) { 355 FieldWidth = Amt; 356 } 357 usesPositionalArg()358 bool usesPositionalArg() const { return UsesPositionalArg; } 359 360 bool hasValidLengthModifier(const TargetInfo &Target) const; 361 362 bool hasStandardLengthModifier() const; 363 364 Optional<LengthModifier> getCorrectedLengthModifier() const; 365 366 bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const; 367 368 bool hasStandardLengthConversionCombination() const; 369 370 /// For a TypedefType QT, if it is a named integer type such as size_t, 371 /// assign the appropriate value to LM and return true. 372 static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM); 373 }; 374 375 } // end analyze_format_string namespace 376 377 //===----------------------------------------------------------------------===// 378 /// Pieces specific to fprintf format strings. 379 380 namespace analyze_printf { 381 382 class PrintfConversionSpecifier : 383 public analyze_format_string::ConversionSpecifier { 384 public: PrintfConversionSpecifier()385 PrintfConversionSpecifier() 386 : ConversionSpecifier(true, 0, InvalidSpecifier) {} 387 PrintfConversionSpecifier(const char * pos,Kind k)388 PrintfConversionSpecifier(const char *pos, Kind k) 389 : ConversionSpecifier(true, pos, k) {} 390 isObjCArg()391 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; } isDoubleArg()392 bool isDoubleArg() const { return kind >= DoubleArgBeg && 393 kind <= DoubleArgEnd; } getLength()394 unsigned getLength() const { 395 // Conversion specifiers currently only are represented by 396 // single characters, but we be flexible. 397 return 1; 398 } 399 classof(const analyze_format_string::ConversionSpecifier * CS)400 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 401 return CS->isPrintfKind(); 402 } 403 }; 404 405 using analyze_format_string::ArgType; 406 using analyze_format_string::LengthModifier; 407 using analyze_format_string::OptionalAmount; 408 using analyze_format_string::OptionalFlag; 409 410 class PrintfSpecifier : public analyze_format_string::FormatSpecifier { 411 OptionalFlag HasThousandsGrouping; // ''', POSIX extension. 412 OptionalFlag IsLeftJustified; // '-' 413 OptionalFlag HasPlusPrefix; // '+' 414 OptionalFlag HasSpacePrefix; // ' ' 415 OptionalFlag HasAlternativeForm; // '#' 416 OptionalFlag HasLeadingZeroes; // '0' 417 OptionalAmount Precision; 418 public: PrintfSpecifier()419 PrintfSpecifier() : 420 FormatSpecifier(/* isPrintf = */ true), 421 HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"), 422 HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {} 423 424 static PrintfSpecifier Parse(const char *beg, const char *end); 425 426 // Methods for incrementally constructing the PrintfSpecifier. setConversionSpecifier(const PrintfConversionSpecifier & cs)427 void setConversionSpecifier(const PrintfConversionSpecifier &cs) { 428 CS = cs; 429 } setHasThousandsGrouping(const char * position)430 void setHasThousandsGrouping(const char *position) { 431 HasThousandsGrouping = true; 432 HasThousandsGrouping.setPosition(position); 433 } setIsLeftJustified(const char * position)434 void setIsLeftJustified(const char *position) { 435 IsLeftJustified = true; 436 IsLeftJustified.setPosition(position); 437 } setHasPlusPrefix(const char * position)438 void setHasPlusPrefix(const char *position) { 439 HasPlusPrefix = true; 440 HasPlusPrefix.setPosition(position); 441 } setHasSpacePrefix(const char * position)442 void setHasSpacePrefix(const char *position) { 443 HasSpacePrefix = true; 444 HasSpacePrefix.setPosition(position); 445 } setHasAlternativeForm(const char * position)446 void setHasAlternativeForm(const char *position) { 447 HasAlternativeForm = true; 448 HasAlternativeForm.setPosition(position); 449 } setHasLeadingZeros(const char * position)450 void setHasLeadingZeros(const char *position) { 451 HasLeadingZeroes = true; 452 HasLeadingZeroes.setPosition(position); 453 } setUsesPositionalArg()454 void setUsesPositionalArg() { UsesPositionalArg = true; } 455 456 // Methods for querying the format specifier. 457 getConversionSpecifier()458 const PrintfConversionSpecifier &getConversionSpecifier() const { 459 return cast<PrintfConversionSpecifier>(CS); 460 } 461 setPrecision(const OptionalAmount & Amt)462 void setPrecision(const OptionalAmount &Amt) { 463 Precision = Amt; 464 Precision.setUsesDotPrefix(); 465 } 466 getPrecision()467 const OptionalAmount &getPrecision() const { 468 return Precision; 469 } 470 consumesDataArgument()471 bool consumesDataArgument() const { 472 return getConversionSpecifier().consumesDataArgument(); 473 } 474 475 /// \brief Returns the builtin type that a data argument 476 /// paired with this format specifier should have. This method 477 /// will return null if the format specifier does not have 478 /// a matching data argument or the matching argument matches 479 /// more than one type. 480 ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const; 481 hasThousandsGrouping()482 const OptionalFlag &hasThousandsGrouping() const { 483 return HasThousandsGrouping; 484 } isLeftJustified()485 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; } hasPlusPrefix()486 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; } hasAlternativeForm()487 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; } hasLeadingZeros()488 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; } hasSpacePrefix()489 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; } usesPositionalArg()490 bool usesPositionalArg() const { return UsesPositionalArg; } 491 492 /// Changes the specifier and length according to a QualType, retaining any 493 /// flags or options. Returns true on success, or false when a conversion 494 /// was not successful. 495 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx, 496 bool IsObjCLiteral); 497 498 void toString(raw_ostream &os) const; 499 500 // Validation methods - to check if any element results in undefined behavior 501 bool hasValidPlusPrefix() const; 502 bool hasValidAlternativeForm() const; 503 bool hasValidLeadingZeros() const; 504 bool hasValidSpacePrefix() const; 505 bool hasValidLeftJustified() const; 506 bool hasValidThousandsGroupingPrefix() const; 507 508 bool hasValidPrecision() const; 509 bool hasValidFieldWidth() const; 510 }; 511 } // end analyze_printf namespace 512 513 //===----------------------------------------------------------------------===// 514 /// Pieces specific to fscanf format strings. 515 516 namespace analyze_scanf { 517 518 class ScanfConversionSpecifier : 519 public analyze_format_string::ConversionSpecifier { 520 public: ScanfConversionSpecifier()521 ScanfConversionSpecifier() 522 : ConversionSpecifier(false, 0, InvalidSpecifier) {} 523 ScanfConversionSpecifier(const char * pos,Kind k)524 ScanfConversionSpecifier(const char *pos, Kind k) 525 : ConversionSpecifier(false, pos, k) {} 526 setEndScanList(const char * pos)527 void setEndScanList(const char *pos) { EndScanList = pos; } 528 classof(const analyze_format_string::ConversionSpecifier * CS)529 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 530 return !CS->isPrintfKind(); 531 } 532 }; 533 534 using analyze_format_string::ArgType; 535 using analyze_format_string::LengthModifier; 536 using analyze_format_string::OptionalAmount; 537 using analyze_format_string::OptionalFlag; 538 539 class ScanfSpecifier : public analyze_format_string::FormatSpecifier { 540 OptionalFlag SuppressAssignment; // '*' 541 public: ScanfSpecifier()542 ScanfSpecifier() : 543 FormatSpecifier(/* isPrintf = */ false), 544 SuppressAssignment("*") {} 545 setSuppressAssignment(const char * position)546 void setSuppressAssignment(const char *position) { 547 SuppressAssignment = true; 548 SuppressAssignment.setPosition(position); 549 } 550 getSuppressAssignment()551 const OptionalFlag &getSuppressAssignment() const { 552 return SuppressAssignment; 553 } 554 setConversionSpecifier(const ScanfConversionSpecifier & cs)555 void setConversionSpecifier(const ScanfConversionSpecifier &cs) { 556 CS = cs; 557 } 558 getConversionSpecifier()559 const ScanfConversionSpecifier &getConversionSpecifier() const { 560 return cast<ScanfConversionSpecifier>(CS); 561 } 562 consumesDataArgument()563 bool consumesDataArgument() const { 564 return CS.consumesDataArgument() && !SuppressAssignment; 565 } 566 567 ArgType getArgType(ASTContext &Ctx) const; 568 569 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx); 570 571 void toString(raw_ostream &os) const; 572 573 static ScanfSpecifier Parse(const char *beg, const char *end); 574 }; 575 576 } // end analyze_scanf namespace 577 578 //===----------------------------------------------------------------------===// 579 // Parsing and processing of format strings (both fprintf and fscanf). 580 581 namespace analyze_format_string { 582 583 enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 }; 584 585 class FormatStringHandler { 586 public: FormatStringHandler()587 FormatStringHandler() {} 588 virtual ~FormatStringHandler(); 589 HandleNullChar(const char * nullCharacter)590 virtual void HandleNullChar(const char *nullCharacter) {} 591 HandlePosition(const char * startPos,unsigned posLen)592 virtual void HandlePosition(const char *startPos, unsigned posLen) {} 593 HandleInvalidPosition(const char * startPos,unsigned posLen,PositionContext p)594 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen, 595 PositionContext p) {} 596 HandleZeroPosition(const char * startPos,unsigned posLen)597 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {} 598 HandleIncompleteSpecifier(const char * startSpecifier,unsigned specifierLen)599 virtual void HandleIncompleteSpecifier(const char *startSpecifier, 600 unsigned specifierLen) {} 601 602 // Printf-specific handlers. 603 HandleInvalidPrintfConversionSpecifier(const analyze_printf::PrintfSpecifier & FS,const char * startSpecifier,unsigned specifierLen)604 virtual bool HandleInvalidPrintfConversionSpecifier( 605 const analyze_printf::PrintfSpecifier &FS, 606 const char *startSpecifier, 607 unsigned specifierLen) { 608 return true; 609 } 610 HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier & FS,const char * startSpecifier,unsigned specifierLen)611 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 612 const char *startSpecifier, 613 unsigned specifierLen) { 614 return true; 615 } 616 617 // Scanf-specific handlers. 618 HandleInvalidScanfConversionSpecifier(const analyze_scanf::ScanfSpecifier & FS,const char * startSpecifier,unsigned specifierLen)619 virtual bool HandleInvalidScanfConversionSpecifier( 620 const analyze_scanf::ScanfSpecifier &FS, 621 const char *startSpecifier, 622 unsigned specifierLen) { 623 return true; 624 } 625 HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier & FS,const char * startSpecifier,unsigned specifierLen)626 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 627 const char *startSpecifier, 628 unsigned specifierLen) { 629 return true; 630 } 631 HandleIncompleteScanList(const char * start,const char * end)632 virtual void HandleIncompleteScanList(const char *start, const char *end) {} 633 }; 634 635 bool ParsePrintfString(FormatStringHandler &H, 636 const char *beg, const char *end, const LangOptions &LO, 637 const TargetInfo &Target); 638 639 bool ParseScanfString(FormatStringHandler &H, 640 const char *beg, const char *end, const LangOptions &LO, 641 const TargetInfo &Target); 642 643 } // end analyze_format_string namespace 644 } // end clang namespace 645 #endif 646