1 //===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H 11 #define LLVM_MC_MCPARSER_MCASMLEXER_H 12 13 #include "llvm/ADT/APInt.h" 14 #include "llvm/ADT/StringRef.h" 15 #include "llvm/Support/Compiler.h" 16 #include "llvm/Support/DataTypes.h" 17 #include "llvm/Support/SMLoc.h" 18 19 namespace llvm { 20 21 /// AsmToken - Target independent representation for an assembler token. 22 class AsmToken { 23 public: 24 enum TokenKind { 25 // Markers 26 Eof, Error, 27 28 // String values. 29 Identifier, 30 String, 31 32 // Integer values. 33 Integer, 34 BigNum, // larger than 64 bits 35 36 // Real values. 37 Real, 38 39 // No-value. 40 EndOfStatement, 41 Colon, 42 Space, 43 Plus, Minus, Tilde, 44 Slash, // '/' 45 BackSlash, // '\' 46 LParen, RParen, LBrac, RBrac, LCurly, RCurly, 47 Star, Dot, Comma, Dollar, Equal, EqualEqual, 48 49 Pipe, PipePipe, Caret, 50 Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash, 51 Less, LessEqual, LessLess, LessGreater, 52 Greater, GreaterEqual, GreaterGreater, At 53 }; 54 55 private: 56 TokenKind Kind; 57 58 /// A reference to the entire token contents; this is always a pointer into 59 /// a memory buffer owned by the source manager. 60 StringRef Str; 61 62 APInt IntVal; 63 64 public: AsmToken()65 AsmToken() {} AsmToken(TokenKind _Kind,StringRef _Str,APInt _IntVal)66 AsmToken(TokenKind _Kind, StringRef _Str, APInt _IntVal) 67 : Kind(_Kind), Str(_Str), IntVal(_IntVal) {} 68 AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0) Kind(_Kind)69 : Kind(_Kind), Str(_Str), IntVal(64, _IntVal, true) {} 70 getKind()71 TokenKind getKind() const { return Kind; } is(TokenKind K)72 bool is(TokenKind K) const { return Kind == K; } isNot(TokenKind K)73 bool isNot(TokenKind K) const { return Kind != K; } 74 75 SMLoc getLoc() const; 76 SMLoc getEndLoc() const; 77 78 /// getStringContents - Get the contents of a string token (without quotes). getStringContents()79 StringRef getStringContents() const { 80 assert(Kind == String && "This token isn't a string!"); 81 return Str.slice(1, Str.size() - 1); 82 } 83 84 /// getIdentifier - Get the identifier string for the current token, which 85 /// should be an identifier or a string. This gets the portion of the string 86 /// which should be used as the identifier, e.g., it does not include the 87 /// quotes on strings. getIdentifier()88 StringRef getIdentifier() const { 89 if (Kind == Identifier) 90 return getString(); 91 return getStringContents(); 92 } 93 94 /// getString - Get the string for the current token, this includes all 95 /// characters (for example, the quotes on strings) in the token. 96 /// 97 /// The returned StringRef points into the source manager's memory buffer, and 98 /// is safe to store across calls to Lex(). getString()99 StringRef getString() const { return Str; } 100 101 // FIXME: Don't compute this in advance, it makes every token larger, and is 102 // also not generally what we want (it is nicer for recovery etc. to lex 123br 103 // as a single token, then diagnose as an invalid number). getIntVal()104 int64_t getIntVal() const { 105 assert(Kind == Integer && "This token isn't an integer!"); 106 return IntVal.getZExtValue(); 107 } 108 getAPIntVal()109 APInt getAPIntVal() const { 110 assert((Kind == Integer || Kind == BigNum) && 111 "This token isn't an integer!"); 112 return IntVal; 113 } 114 }; 115 116 /// MCAsmLexer - Generic assembler lexer interface, for use by target specific 117 /// assembly lexers. 118 class MCAsmLexer { 119 /// The current token, stored in the base class for faster access. 120 AsmToken CurTok; 121 122 /// The location and description of the current error 123 SMLoc ErrLoc; 124 std::string Err; 125 126 MCAsmLexer(const MCAsmLexer &) LLVM_DELETED_FUNCTION; 127 void operator=(const MCAsmLexer &) LLVM_DELETED_FUNCTION; 128 protected: // Can only create subclasses. 129 const char *TokStart; 130 bool SkipSpace; 131 bool AllowAtInIdentifier; 132 133 MCAsmLexer(); 134 135 virtual AsmToken LexToken() = 0; 136 SetError(const SMLoc & errLoc,const std::string & err)137 void SetError(const SMLoc &errLoc, const std::string &err) { 138 ErrLoc = errLoc; 139 Err = err; 140 } 141 142 public: 143 virtual ~MCAsmLexer(); 144 145 /// Lex - Consume the next token from the input stream and return it. 146 /// 147 /// The lexer will continuosly return the end-of-file token once the end of 148 /// the main input file has been reached. Lex()149 const AsmToken &Lex() { 150 return CurTok = LexToken(); 151 } 152 153 virtual StringRef LexUntilEndOfStatement() = 0; 154 155 /// getLoc - Get the current source location. 156 SMLoc getLoc() const; 157 158 /// getTok - Get the current (last) lexed token. getTok()159 const AsmToken &getTok() { 160 return CurTok; 161 } 162 163 /// peekTok - Look ahead at the next token to be lexed. 164 virtual const AsmToken peekTok(bool ShouldSkipSpace = true) = 0; 165 166 /// getErrLoc - Get the current error location getErrLoc()167 const SMLoc &getErrLoc() { 168 return ErrLoc; 169 } 170 171 /// getErr - Get the current error string getErr()172 const std::string &getErr() { 173 return Err; 174 } 175 176 /// getKind - Get the kind of current token. getKind()177 AsmToken::TokenKind getKind() const { return CurTok.getKind(); } 178 179 /// is - Check if the current token has kind \p K. is(AsmToken::TokenKind K)180 bool is(AsmToken::TokenKind K) const { return CurTok.is(K); } 181 182 /// isNot - Check if the current token has kind \p K. isNot(AsmToken::TokenKind K)183 bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); } 184 185 /// setSkipSpace - Set whether spaces should be ignored by the lexer setSkipSpace(bool val)186 void setSkipSpace(bool val) { SkipSpace = val; } 187 getAllowAtInIdentifier()188 bool getAllowAtInIdentifier() { return AllowAtInIdentifier; } setAllowAtInIdentifier(bool v)189 void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; } 190 }; 191 192 } // End llvm namespace 193 194 #endif 195