1 //===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_MC_MCASMLEXER_H 11 #define LLVM_MC_MCASMLEXER_H 12 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/Support/DataTypes.h" 15 #include "llvm/Support/SMLoc.h" 16 17 namespace llvm { 18 class MCAsmLexer; 19 class MCInst; 20 21 /// AsmToken - Target independent representation for an assembler token. 22 class AsmToken { 23 public: 24 enum TokenKind { 25 // Markers 26 Eof, Error, 27 28 // String values. 29 Identifier, 30 String, 31 32 // Integer values. 33 Integer, 34 35 // Real values. 36 Real, 37 38 // Register values (stored in IntVal). Only used by MCTargetAsmLexer. 39 Register, 40 41 // No-value. 42 EndOfStatement, 43 Colon, 44 Plus, Minus, Tilde, 45 Slash, // '/' 46 BackSlash, // '\' 47 LParen, RParen, LBrac, RBrac, LCurly, RCurly, 48 Star, Dot, Comma, Dollar, Equal, EqualEqual, 49 50 Pipe, PipePipe, Caret, 51 Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash, 52 Less, LessEqual, LessLess, LessGreater, 53 Greater, GreaterEqual, GreaterGreater, At 54 }; 55 56 TokenKind Kind; 57 58 /// A reference to the entire token contents; this is always a pointer into 59 /// a memory buffer owned by the source manager. 60 StringRef Str; 61 62 int64_t IntVal; 63 64 public: AsmToken()65 AsmToken() {} 66 AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0) Kind(_Kind)67 : Kind(_Kind), Str(_Str), IntVal(_IntVal) {} 68 getKind()69 TokenKind getKind() const { return Kind; } is(TokenKind K)70 bool is(TokenKind K) const { return Kind == K; } isNot(TokenKind K)71 bool isNot(TokenKind K) const { return Kind != K; } 72 73 SMLoc getLoc() const; 74 75 /// getStringContents - Get the contents of a string token (without quotes). getStringContents()76 StringRef getStringContents() const { 77 assert(Kind == String && "This token isn't a string!"); 78 return Str.slice(1, Str.size() - 1); 79 } 80 81 /// getIdentifier - Get the identifier string for the current token, which 82 /// should be an identifier or a string. This gets the portion of the string 83 /// which should be used as the identifier, e.g., it does not include the 84 /// quotes on strings. getIdentifier()85 StringRef getIdentifier() const { 86 if (Kind == Identifier) 87 return getString(); 88 return getStringContents(); 89 } 90 91 /// getString - Get the string for the current token, this includes all 92 /// characters (for example, the quotes on strings) in the token. 93 /// 94 /// The returned StringRef points into the source manager's memory buffer, and 95 /// is safe to store across calls to Lex(). getString()96 StringRef getString() const { return Str; } 97 98 // FIXME: Don't compute this in advance, it makes every token larger, and is 99 // also not generally what we want (it is nicer for recovery etc. to lex 123br 100 // as a single token, then diagnose as an invalid number). getIntVal()101 int64_t getIntVal() const { 102 assert(Kind == Integer && "This token isn't an integer!"); 103 return IntVal; 104 } 105 106 /// getRegVal - Get the register number for the current token, which should 107 /// be a register. getRegVal()108 unsigned getRegVal() const { 109 assert(Kind == Register && "This token isn't a register!"); 110 return static_cast<unsigned>(IntVal); 111 } 112 }; 113 114 /// MCAsmLexer - Generic assembler lexer interface, for use by target specific 115 /// assembly lexers. 116 class MCAsmLexer { 117 /// The current token, stored in the base class for faster access. 118 AsmToken CurTok; 119 120 /// The location and description of the current error 121 SMLoc ErrLoc; 122 std::string Err; 123 124 MCAsmLexer(const MCAsmLexer &); // DO NOT IMPLEMENT 125 void operator=(const MCAsmLexer &); // DO NOT IMPLEMENT 126 protected: // Can only create subclasses. 127 const char *TokStart; 128 129 MCAsmLexer(); 130 131 virtual AsmToken LexToken() = 0; 132 SetError(const SMLoc & errLoc,const std::string & err)133 void SetError(const SMLoc &errLoc, const std::string &err) { 134 ErrLoc = errLoc; 135 Err = err; 136 } 137 138 public: 139 virtual ~MCAsmLexer(); 140 141 /// Lex - Consume the next token from the input stream and return it. 142 /// 143 /// The lexer will continuosly return the end-of-file token once the end of 144 /// the main input file has been reached. Lex()145 const AsmToken &Lex() { 146 return CurTok = LexToken(); 147 } 148 149 virtual StringRef LexUntilEndOfStatement() = 0; 150 151 /// getLoc - Get the current source location. 152 SMLoc getLoc() const; 153 154 /// getTok - Get the current (last) lexed token. getTok()155 const AsmToken &getTok() { 156 return CurTok; 157 } 158 159 /// getErrLoc - Get the current error location getErrLoc()160 const SMLoc &getErrLoc() { 161 return ErrLoc; 162 } 163 164 /// getErr - Get the current error string getErr()165 const std::string &getErr() { 166 return Err; 167 } 168 169 /// getKind - Get the kind of current token. getKind()170 AsmToken::TokenKind getKind() const { return CurTok.getKind(); } 171 172 /// is - Check if the current token has kind \arg K. is(AsmToken::TokenKind K)173 bool is(AsmToken::TokenKind K) const { return CurTok.is(K); } 174 175 /// isNot - Check if the current token has kind \arg K. isNot(AsmToken::TokenKind K)176 bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); } 177 }; 178 179 } // End llvm namespace 180 181 #endif 182