• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_MC_MCASMLEXER_H
11 #define LLVM_MC_MCASMLEXER_H
12 
13 #include "llvm/ADT/StringRef.h"
14 #include "llvm/Support/DataTypes.h"
15 #include "llvm/Support/SMLoc.h"
16 
17 namespace llvm {
18 class MCAsmLexer;
19 class MCInst;
20 class Target;
21 
22 /// AsmToken - Target independent representation for an assembler token.
23 class AsmToken {
24 public:
25   enum TokenKind {
26     // Markers
27     Eof, Error,
28 
29     // String values.
30     Identifier,
31     String,
32 
33     // Integer values.
34     Integer,
35 
36     // Real values.
37     Real,
38 
39     // Register values (stored in IntVal).  Only used by TargetAsmLexer.
40     Register,
41 
42     // No-value.
43     EndOfStatement,
44     Colon,
45     Plus, Minus, Tilde,
46     Slash,    // '/'
47     BackSlash, // '\'
48     LParen, RParen, LBrac, RBrac, LCurly, RCurly,
49     Star, Dot, Comma, Dollar, Equal, EqualEqual,
50 
51     Pipe, PipePipe, Caret,
52     Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
53     Less, LessEqual, LessLess, LessGreater,
54     Greater, GreaterEqual, GreaterGreater, At
55   };
56 
57   TokenKind Kind;
58 
59   /// A reference to the entire token contents; this is always a pointer into
60   /// a memory buffer owned by the source manager.
61   StringRef Str;
62 
63   int64_t IntVal;
64 
65 public:
AsmToken()66   AsmToken() {}
67   AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0)
Kind(_Kind)68     : Kind(_Kind), Str(_Str), IntVal(_IntVal) {}
69 
getKind()70   TokenKind getKind() const { return Kind; }
is(TokenKind K)71   bool is(TokenKind K) const { return Kind == K; }
isNot(TokenKind K)72   bool isNot(TokenKind K) const { return Kind != K; }
73 
74   SMLoc getLoc() const;
75 
76   /// getStringContents - Get the contents of a string token (without quotes).
getStringContents()77   StringRef getStringContents() const {
78     assert(Kind == String && "This token isn't a string!");
79     return Str.slice(1, Str.size() - 1);
80   }
81 
82   /// getIdentifier - Get the identifier string for the current token, which
83   /// should be an identifier or a string. This gets the portion of the string
84   /// which should be used as the identifier, e.g., it does not include the
85   /// quotes on strings.
getIdentifier()86   StringRef getIdentifier() const {
87     if (Kind == Identifier)
88       return getString();
89     return getStringContents();
90   }
91 
92   /// getString - Get the string for the current token, this includes all
93   /// characters (for example, the quotes on strings) in the token.
94   ///
95   /// The returned StringRef points into the source manager's memory buffer, and
96   /// is safe to store across calls to Lex().
getString()97   StringRef getString() const { return Str; }
98 
99   // FIXME: Don't compute this in advance, it makes every token larger, and is
100   // also not generally what we want (it is nicer for recovery etc. to lex 123br
101   // as a single token, then diagnose as an invalid number).
getIntVal()102   int64_t getIntVal() const {
103     assert(Kind == Integer && "This token isn't an integer!");
104     return IntVal;
105   }
106 
107   /// getRegVal - Get the register number for the current token, which should
108   /// be a register.
getRegVal()109   unsigned getRegVal() const {
110     assert(Kind == Register && "This token isn't a register!");
111     return static_cast<unsigned>(IntVal);
112   }
113 };
114 
115 /// MCAsmLexer - Generic assembler lexer interface, for use by target specific
116 /// assembly lexers.
117 class MCAsmLexer {
118   /// The current token, stored in the base class for faster access.
119   AsmToken CurTok;
120 
121   /// The location and description of the current error
122   SMLoc ErrLoc;
123   std::string Err;
124 
125   MCAsmLexer(const MCAsmLexer &);   // DO NOT IMPLEMENT
126   void operator=(const MCAsmLexer &);  // DO NOT IMPLEMENT
127 protected: // Can only create subclasses.
128   const char *TokStart;
129 
130   MCAsmLexer();
131 
132   virtual AsmToken LexToken() = 0;
133 
SetError(const SMLoc & errLoc,const std::string & err)134   void SetError(const SMLoc &errLoc, const std::string &err) {
135     ErrLoc = errLoc;
136     Err = err;
137   }
138 
139 public:
140   virtual ~MCAsmLexer();
141 
142   /// Lex - Consume the next token from the input stream and return it.
143   ///
144   /// The lexer will continuosly return the end-of-file token once the end of
145   /// the main input file has been reached.
Lex()146   const AsmToken &Lex() {
147     return CurTok = LexToken();
148   }
149 
150   virtual StringRef LexUntilEndOfStatement() = 0;
151 
152   /// getLoc - Get the current source location.
153   SMLoc getLoc() const;
154 
155   /// getTok - Get the current (last) lexed token.
getTok()156   const AsmToken &getTok() {
157     return CurTok;
158   }
159 
160   /// getErrLoc - Get the current error location
getErrLoc()161   const SMLoc &getErrLoc() {
162     return ErrLoc;
163   }
164 
165   /// getErr - Get the current error string
getErr()166   const std::string &getErr() {
167     return Err;
168   }
169 
170   /// getKind - Get the kind of current token.
getKind()171   AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
172 
173   /// is - Check if the current token has kind \arg K.
is(AsmToken::TokenKind K)174   bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
175 
176   /// isNot - Check if the current token has kind \arg K.
isNot(AsmToken::TokenKind K)177   bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
178 };
179 
180 } // End llvm namespace
181 
182 #endif
183