• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
11 #define LLVM_MC_MCPARSER_MCASMLEXER_H
12 
13 #include "llvm/ADT/APInt.h"
14 #include "llvm/ADT/StringRef.h"
15 #include "llvm/Support/Compiler.h"
16 #include "llvm/Support/DataTypes.h"
17 #include "llvm/Support/SMLoc.h"
18 
19 namespace llvm {
20 
21 /// AsmToken - Target independent representation for an assembler token.
22 class AsmToken {
23 public:
24   enum TokenKind {
25     // Markers
26     Eof, Error,
27 
28     // String values.
29     Identifier,
30     String,
31 
32     // Integer values.
33     Integer,
34     BigNum, // larger than 64 bits
35 
36     // Real values.
37     Real,
38 
39     // No-value.
40     EndOfStatement,
41     Colon,
42     Space,
43     Plus, Minus, Tilde,
44     Slash,    // '/'
45     BackSlash, // '\'
46     LParen, RParen, LBrac, RBrac, LCurly, RCurly,
47     Star, Dot, Comma, Dollar, Equal, EqualEqual,
48 
49     Pipe, PipePipe, Caret,
50     Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
51     Less, LessEqual, LessLess, LessGreater,
52     Greater, GreaterEqual, GreaterGreater, At
53   };
54 
55 private:
56   TokenKind Kind;
57 
58   /// A reference to the entire token contents; this is always a pointer into
59   /// a memory buffer owned by the source manager.
60   StringRef Str;
61 
62   APInt IntVal;
63 
64 public:
AsmToken()65   AsmToken() {}
AsmToken(TokenKind _Kind,StringRef _Str,APInt _IntVal)66   AsmToken(TokenKind _Kind, StringRef _Str, APInt _IntVal)
67     : Kind(_Kind), Str(_Str), IntVal(_IntVal) {}
68   AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0)
Kind(_Kind)69     : Kind(_Kind), Str(_Str), IntVal(64, _IntVal, true) {}
70 
getKind()71   TokenKind getKind() const { return Kind; }
is(TokenKind K)72   bool is(TokenKind K) const { return Kind == K; }
isNot(TokenKind K)73   bool isNot(TokenKind K) const { return Kind != K; }
74 
75   SMLoc getLoc() const;
76   SMLoc getEndLoc() const;
77 
78   /// getStringContents - Get the contents of a string token (without quotes).
getStringContents()79   StringRef getStringContents() const {
80     assert(Kind == String && "This token isn't a string!");
81     return Str.slice(1, Str.size() - 1);
82   }
83 
84   /// getIdentifier - Get the identifier string for the current token, which
85   /// should be an identifier or a string. This gets the portion of the string
86   /// which should be used as the identifier, e.g., it does not include the
87   /// quotes on strings.
getIdentifier()88   StringRef getIdentifier() const {
89     if (Kind == Identifier)
90       return getString();
91     return getStringContents();
92   }
93 
94   /// getString - Get the string for the current token, this includes all
95   /// characters (for example, the quotes on strings) in the token.
96   ///
97   /// The returned StringRef points into the source manager's memory buffer, and
98   /// is safe to store across calls to Lex().
getString()99   StringRef getString() const { return Str; }
100 
101   // FIXME: Don't compute this in advance, it makes every token larger, and is
102   // also not generally what we want (it is nicer for recovery etc. to lex 123br
103   // as a single token, then diagnose as an invalid number).
getIntVal()104   int64_t getIntVal() const {
105     assert(Kind == Integer && "This token isn't an integer!");
106     return IntVal.getZExtValue();
107   }
108 
getAPIntVal()109   APInt getAPIntVal() const {
110     assert((Kind == Integer || Kind == BigNum) &&
111            "This token isn't an integer!");
112     return IntVal;
113   }
114 };
115 
116 /// MCAsmLexer - Generic assembler lexer interface, for use by target specific
117 /// assembly lexers.
118 class MCAsmLexer {
119   /// The current token, stored in the base class for faster access.
120   AsmToken CurTok;
121 
122   /// The location and description of the current error
123   SMLoc ErrLoc;
124   std::string Err;
125 
126   MCAsmLexer(const MCAsmLexer &) LLVM_DELETED_FUNCTION;
127   void operator=(const MCAsmLexer &) LLVM_DELETED_FUNCTION;
128 protected: // Can only create subclasses.
129   const char *TokStart;
130   bool SkipSpace;
131   bool AllowAtInIdentifier;
132 
133   MCAsmLexer();
134 
135   virtual AsmToken LexToken() = 0;
136 
SetError(const SMLoc & errLoc,const std::string & err)137   void SetError(const SMLoc &errLoc, const std::string &err) {
138     ErrLoc = errLoc;
139     Err = err;
140   }
141 
142 public:
143   virtual ~MCAsmLexer();
144 
145   /// Lex - Consume the next token from the input stream and return it.
146   ///
147   /// The lexer will continuosly return the end-of-file token once the end of
148   /// the main input file has been reached.
Lex()149   const AsmToken &Lex() {
150     return CurTok = LexToken();
151   }
152 
153   virtual StringRef LexUntilEndOfStatement() = 0;
154 
155   /// getLoc - Get the current source location.
156   SMLoc getLoc() const;
157 
158   /// getTok - Get the current (last) lexed token.
getTok()159   const AsmToken &getTok() {
160     return CurTok;
161   }
162 
163   /// peekTok - Look ahead at the next token to be lexed.
164   virtual const AsmToken peekTok(bool ShouldSkipSpace = true) = 0;
165 
166   /// getErrLoc - Get the current error location
getErrLoc()167   const SMLoc &getErrLoc() {
168     return ErrLoc;
169   }
170 
171   /// getErr - Get the current error string
getErr()172   const std::string &getErr() {
173     return Err;
174   }
175 
176   /// getKind - Get the kind of current token.
getKind()177   AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
178 
179   /// is - Check if the current token has kind \p K.
is(AsmToken::TokenKind K)180   bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
181 
182   /// isNot - Check if the current token has kind \p K.
isNot(AsmToken::TokenKind K)183   bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
184 
185   /// setSkipSpace - Set whether spaces should be ignored by the lexer
setSkipSpace(bool val)186   void setSkipSpace(bool val) { SkipSpace = val; }
187 
getAllowAtInIdentifier()188   bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
setAllowAtInIdentifier(bool v)189   void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
190 };
191 
192 } // End llvm namespace
193 
194 #endif
195