• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===--- LiteralSupport.h ---------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the NumericLiteralParser, CharLiteralParser, and
11 // StringLiteralParser interfaces.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef CLANG_LITERALSUPPORT_H
16 #define CLANG_LITERALSUPPORT_H
17 
18 #include "clang/Basic/CharInfo.h"
19 #include "clang/Basic/LLVM.h"
20 #include "clang/Basic/TokenKinds.h"
21 #include "llvm/ADT/APFloat.h"
22 #include "llvm/ADT/SmallString.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/DataTypes.h"
25 
26 namespace clang {
27 
28 class DiagnosticsEngine;
29 class Preprocessor;
30 class Token;
31 class SourceLocation;
32 class TargetInfo;
33 class SourceManager;
34 class LangOptions;
35 
36 /// NumericLiteralParser - This performs strict semantic analysis of the content
37 /// of a ppnumber, classifying it as either integer, floating, or erroneous,
38 /// determines the radix of the value and can convert it to a useful value.
39 class NumericLiteralParser {
40   Preprocessor &PP; // needed for diagnostics
41 
42   const char *const ThisTokBegin;
43   const char *const ThisTokEnd;
44   const char *DigitsBegin, *SuffixBegin; // markers
45   const char *s; // cursor
46 
47   unsigned radix;
48 
49   bool saw_exponent, saw_period, saw_ud_suffix;
50 
51 public:
52   NumericLiteralParser(StringRef TokSpelling,
53                        SourceLocation TokLoc,
54                        Preprocessor &PP);
55   bool hadError;
56   bool isUnsigned;
57   bool isLong;        // This is *not* set for long long.
58   bool isLongLong;
59   bool isFloat;       // 1.0f
60   bool isImaginary;   // 1.0i
61   bool isMicrosoftInteger;  // Microsoft suffix extension i8, i16, i32, or i64.
62 
isIntegerLiteral()63   bool isIntegerLiteral() const {
64     return !saw_period && !saw_exponent;
65   }
isFloatingLiteral()66   bool isFloatingLiteral() const {
67     return saw_period || saw_exponent;
68   }
69 
hasUDSuffix()70   bool hasUDSuffix() const {
71     return saw_ud_suffix;
72   }
getUDSuffix()73   StringRef getUDSuffix() const {
74     assert(saw_ud_suffix);
75     return StringRef(SuffixBegin, ThisTokEnd - SuffixBegin);
76   }
getUDSuffixOffset()77   unsigned getUDSuffixOffset() const {
78     assert(saw_ud_suffix);
79     return SuffixBegin - ThisTokBegin;
80   }
81 
82   static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix);
83 
getRadix()84   unsigned getRadix() const { return radix; }
85 
86   /// GetIntegerValue - Convert this numeric literal value to an APInt that
87   /// matches Val's input width.  If there is an overflow (i.e., if the unsigned
88   /// value read is larger than the APInt's bits will hold), set Val to the low
89   /// bits of the result and return true.  Otherwise, return false.
90   bool GetIntegerValue(llvm::APInt &Val);
91 
92   /// GetFloatValue - Convert this numeric literal to a floating value, using
93   /// the specified APFloat fltSemantics (specifying float, double, etc).
94   /// The optional bool isExact (passed-by-reference) has its value
95   /// set to true if the returned APFloat can represent the number in the
96   /// literal exactly, and false otherwise.
97   llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result);
98 
99 private:
100 
101   void ParseNumberStartingWithZero(SourceLocation TokLoc);
102 
103   /// SkipHexDigits - Read and skip over any hex digits, up to End.
104   /// Return a pointer to the first non-hex digit or End.
SkipHexDigits(const char * ptr)105   const char *SkipHexDigits(const char *ptr) {
106     while (ptr != ThisTokEnd && isHexDigit(*ptr))
107       ptr++;
108     return ptr;
109   }
110 
111   /// SkipOctalDigits - Read and skip over any octal digits, up to End.
112   /// Return a pointer to the first non-hex digit or End.
SkipOctalDigits(const char * ptr)113   const char *SkipOctalDigits(const char *ptr) {
114     while (ptr != ThisTokEnd && ((*ptr >= '0') && (*ptr <= '7')))
115       ptr++;
116     return ptr;
117   }
118 
119   /// SkipDigits - Read and skip over any digits, up to End.
120   /// Return a pointer to the first non-hex digit or End.
SkipDigits(const char * ptr)121   const char *SkipDigits(const char *ptr) {
122     while (ptr != ThisTokEnd && isDigit(*ptr))
123       ptr++;
124     return ptr;
125   }
126 
127   /// SkipBinaryDigits - Read and skip over any binary digits, up to End.
128   /// Return a pointer to the first non-binary digit or End.
SkipBinaryDigits(const char * ptr)129   const char *SkipBinaryDigits(const char *ptr) {
130     while (ptr != ThisTokEnd && (*ptr == '0' || *ptr == '1'))
131       ptr++;
132     return ptr;
133   }
134 
135 };
136 
137 /// CharLiteralParser - Perform interpretation and semantic analysis of a
138 /// character literal.
139 class CharLiteralParser {
140   uint64_t Value;
141   tok::TokenKind Kind;
142   bool IsMultiChar;
143   bool HadError;
144   SmallString<32> UDSuffixBuf;
145   unsigned UDSuffixOffset;
146 public:
147   CharLiteralParser(const char *begin, const char *end,
148                     SourceLocation Loc, Preprocessor &PP,
149                     tok::TokenKind kind);
150 
hadError()151   bool hadError() const { return HadError; }
isAscii()152   bool isAscii() const { return Kind == tok::char_constant; }
isWide()153   bool isWide() const { return Kind == tok::wide_char_constant; }
isUTF16()154   bool isUTF16() const { return Kind == tok::utf16_char_constant; }
isUTF32()155   bool isUTF32() const { return Kind == tok::utf32_char_constant; }
isMultiChar()156   bool isMultiChar() const { return IsMultiChar; }
getValue()157   uint64_t getValue() const { return Value; }
getUDSuffix()158   StringRef getUDSuffix() const { return UDSuffixBuf; }
getUDSuffixOffset()159   unsigned getUDSuffixOffset() const {
160     assert(!UDSuffixBuf.empty() && "no ud-suffix");
161     return UDSuffixOffset;
162   }
163 };
164 
165 /// StringLiteralParser - This decodes string escape characters and performs
166 /// wide string analysis and Translation Phase #6 (concatenation of string
167 /// literals) (C99 5.1.1.2p1).
168 class StringLiteralParser {
169   const SourceManager &SM;
170   const LangOptions &Features;
171   const TargetInfo &Target;
172   DiagnosticsEngine *Diags;
173 
174   unsigned MaxTokenLength;
175   unsigned SizeBound;
176   unsigned CharByteWidth;
177   tok::TokenKind Kind;
178   SmallString<512> ResultBuf;
179   char *ResultPtr; // cursor
180   SmallString<32> UDSuffixBuf;
181   unsigned UDSuffixToken;
182   unsigned UDSuffixOffset;
183 public:
184   StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
185                       Preprocessor &PP, bool Complain = true);
186   StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
187                       const SourceManager &sm, const LangOptions &features,
188                       const TargetInfo &target, DiagnosticsEngine *diags = 0)
SM(sm)189     : SM(sm), Features(features), Target(target), Diags(diags),
190       MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
191       ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
192     init(StringToks, NumStringToks);
193   }
194 
195 
196   bool hadError;
197   bool Pascal;
198 
GetString()199   StringRef GetString() const {
200     return StringRef(ResultBuf.data(), GetStringLength());
201   }
GetStringLength()202   unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); }
203 
GetNumStringChars()204   unsigned GetNumStringChars() const {
205     return GetStringLength() / CharByteWidth;
206   }
207   /// getOffsetOfStringByte - This function returns the offset of the
208   /// specified byte of the string data represented by Token.  This handles
209   /// advancing over escape sequences in the string.
210   ///
211   /// If the Diagnostics pointer is non-null, then this will do semantic
212   /// checking of the string literal and emit errors and warnings.
213   unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const;
214 
isAscii()215   bool isAscii() const { return Kind == tok::string_literal; }
isWide()216   bool isWide() const { return Kind == tok::wide_string_literal; }
isUTF8()217   bool isUTF8() const { return Kind == tok::utf8_string_literal; }
isUTF16()218   bool isUTF16() const { return Kind == tok::utf16_string_literal; }
isUTF32()219   bool isUTF32() const { return Kind == tok::utf32_string_literal; }
isPascal()220   bool isPascal() const { return Pascal; }
221 
getUDSuffix()222   StringRef getUDSuffix() const { return UDSuffixBuf; }
223 
224   /// Get the index of a token containing a ud-suffix.
getUDSuffixToken()225   unsigned getUDSuffixToken() const {
226     assert(!UDSuffixBuf.empty() && "no ud-suffix");
227     return UDSuffixToken;
228   }
229   /// Get the spelling offset of the first byte of the ud-suffix.
getUDSuffixOffset()230   unsigned getUDSuffixOffset() const {
231     assert(!UDSuffixBuf.empty() && "no ud-suffix");
232     return UDSuffixOffset;
233   }
234 
235 private:
236   void init(const Token *StringToks, unsigned NumStringToks);
237   bool CopyStringFragment(const Token &Tok, const char *TokBegin,
238                           StringRef Fragment);
239   void DiagnoseLexingError(SourceLocation Loc);
240 };
241 
242 }  // end namespace clang
243 
244 #endif
245