• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
11 #define LLVM_MC_MCPARSER_MCASMLEXER_H
12 
13 #include "llvm/ADT/StringRef.h"
14 #include "llvm/Support/Compiler.h"
15 #include "llvm/Support/DataTypes.h"
16 #include "llvm/Support/SMLoc.h"
17 
18 namespace llvm {
19 
20 /// AsmToken - Target independent representation for an assembler token.
21 class AsmToken {
22 public:
23   enum TokenKind {
24     // Markers
25     Eof, Error,
26 
27     // String values.
28     Identifier,
29     String,
30 
31     // Integer values.
32     Integer,
33 
34     // Real values.
35     Real,
36 
37     // No-value.
38     EndOfStatement,
39     Colon,
40     Space,
41     Plus, Minus, Tilde,
42     Slash,    // '/'
43     BackSlash, // '\'
44     LParen, RParen, LBrac, RBrac, LCurly, RCurly,
45     Star, Dot, Comma, Dollar, Equal, EqualEqual,
46 
47     Pipe, PipePipe, Caret,
48     Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
49     Less, LessEqual, LessLess, LessGreater,
50     Greater, GreaterEqual, GreaterGreater, At
51   };
52 
53 private:
54   TokenKind Kind;
55 
56   /// A reference to the entire token contents; this is always a pointer into
57   /// a memory buffer owned by the source manager.
58   StringRef Str;
59 
60   int64_t IntVal;
61 
62 public:
AsmToken()63   AsmToken() {}
64   AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0)
Kind(_Kind)65     : Kind(_Kind), Str(_Str), IntVal(_IntVal) {}
66 
getKind()67   TokenKind getKind() const { return Kind; }
is(TokenKind K)68   bool is(TokenKind K) const { return Kind == K; }
isNot(TokenKind K)69   bool isNot(TokenKind K) const { return Kind != K; }
70 
71   SMLoc getLoc() const;
72   SMLoc getEndLoc() const;
73 
74   /// getStringContents - Get the contents of a string token (without quotes).
getStringContents()75   StringRef getStringContents() const {
76     assert(Kind == String && "This token isn't a string!");
77     return Str.slice(1, Str.size() - 1);
78   }
79 
80   /// getIdentifier - Get the identifier string for the current token, which
81   /// should be an identifier or a string. This gets the portion of the string
82   /// which should be used as the identifier, e.g., it does not include the
83   /// quotes on strings.
getIdentifier()84   StringRef getIdentifier() const {
85     if (Kind == Identifier)
86       return getString();
87     return getStringContents();
88   }
89 
90   /// getString - Get the string for the current token, this includes all
91   /// characters (for example, the quotes on strings) in the token.
92   ///
93   /// The returned StringRef points into the source manager's memory buffer, and
94   /// is safe to store across calls to Lex().
getString()95   StringRef getString() const { return Str; }
96 
97   // FIXME: Don't compute this in advance, it makes every token larger, and is
98   // also not generally what we want (it is nicer for recovery etc. to lex 123br
99   // as a single token, then diagnose as an invalid number).
getIntVal()100   int64_t getIntVal() const {
101     assert(Kind == Integer && "This token isn't an integer!");
102     return IntVal;
103   }
104 };
105 
106 /// MCAsmLexer - Generic assembler lexer interface, for use by target specific
107 /// assembly lexers.
108 class MCAsmLexer {
109   /// The current token, stored in the base class for faster access.
110   AsmToken CurTok;
111 
112   /// The location and description of the current error
113   SMLoc ErrLoc;
114   std::string Err;
115 
116   MCAsmLexer(const MCAsmLexer &) LLVM_DELETED_FUNCTION;
117   void operator=(const MCAsmLexer &) LLVM_DELETED_FUNCTION;
118 protected: // Can only create subclasses.
119   const char *TokStart;
120   bool SkipSpace;
121 
122   MCAsmLexer();
123 
124   virtual AsmToken LexToken() = 0;
125 
SetError(const SMLoc & errLoc,const std::string & err)126   void SetError(const SMLoc &errLoc, const std::string &err) {
127     ErrLoc = errLoc;
128     Err = err;
129   }
130 
131 public:
132   virtual ~MCAsmLexer();
133 
134   /// Lex - Consume the next token from the input stream and return it.
135   ///
136   /// The lexer will continuosly return the end-of-file token once the end of
137   /// the main input file has been reached.
Lex()138   const AsmToken &Lex() {
139     return CurTok = LexToken();
140   }
141 
142   virtual StringRef LexUntilEndOfStatement() = 0;
143 
144   /// getLoc - Get the current source location.
145   SMLoc getLoc() const;
146 
147   /// getTok - Get the current (last) lexed token.
getTok()148   const AsmToken &getTok() {
149     return CurTok;
150   }
151 
152   /// getErrLoc - Get the current error location
getErrLoc()153   const SMLoc &getErrLoc() {
154     return ErrLoc;
155   }
156 
157   /// getErr - Get the current error string
getErr()158   const std::string &getErr() {
159     return Err;
160   }
161 
162   /// getKind - Get the kind of current token.
getKind()163   AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
164 
165   /// is - Check if the current token has kind \p K.
is(AsmToken::TokenKind K)166   bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
167 
168   /// isNot - Check if the current token has kind \p K.
isNot(AsmToken::TokenKind K)169   bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
170 
171   /// setSkipSpace - Set whether spaces should be ignored by the lexer
setSkipSpace(bool val)172   void setSkipSpace(bool val) { SkipSpace = val; }
173 };
174 
175 } // End llvm namespace
176 
177 #endif
178