• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_MC_MCASMLEXER_H
11 #define LLVM_MC_MCASMLEXER_H
12 
13 #include "llvm/ADT/StringRef.h"
14 #include "llvm/Support/DataTypes.h"
15 #include "llvm/Support/SMLoc.h"
16 
17 namespace llvm {
18 
19 /// AsmToken - Target independent representation for an assembler token.
20 class AsmToken {
21 public:
22   enum TokenKind {
23     // Markers
24     Eof, Error,
25 
26     // String values.
27     Identifier,
28     String,
29 
30     // Integer values.
31     Integer,
32 
33     // Real values.
34     Real,
35 
36     // Register values (stored in IntVal).  Only used by MCTargetAsmLexer.
37     Register,
38 
39     // No-value.
40     EndOfStatement,
41     Colon,
42     Plus, Minus, Tilde,
43     Slash,    // '/'
44     BackSlash, // '\'
45     LParen, RParen, LBrac, RBrac, LCurly, RCurly,
46     Star, Dot, Comma, Dollar, Equal, EqualEqual,
47 
48     Pipe, PipePipe, Caret,
49     Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
50     Less, LessEqual, LessLess, LessGreater,
51     Greater, GreaterEqual, GreaterGreater, At
52   };
53 
54 private:
55   TokenKind Kind;
56 
57   /// A reference to the entire token contents; this is always a pointer into
58   /// a memory buffer owned by the source manager.
59   StringRef Str;
60 
61   int64_t IntVal;
62 
63 public:
AsmToken()64   AsmToken() {}
65   AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0)
Kind(_Kind)66     : Kind(_Kind), Str(_Str), IntVal(_IntVal) {}
67 
getKind()68   TokenKind getKind() const { return Kind; }
is(TokenKind K)69   bool is(TokenKind K) const { return Kind == K; }
isNot(TokenKind K)70   bool isNot(TokenKind K) const { return Kind != K; }
71 
72   SMLoc getLoc() const;
73   SMLoc getEndLoc() const;
74 
75   /// getStringContents - Get the contents of a string token (without quotes).
getStringContents()76   StringRef getStringContents() const {
77     assert(Kind == String && "This token isn't a string!");
78     return Str.slice(1, Str.size() - 1);
79   }
80 
81   /// getIdentifier - Get the identifier string for the current token, which
82   /// should be an identifier or a string. This gets the portion of the string
83   /// which should be used as the identifier, e.g., it does not include the
84   /// quotes on strings.
getIdentifier()85   StringRef getIdentifier() const {
86     if (Kind == Identifier)
87       return getString();
88     return getStringContents();
89   }
90 
91   /// getString - Get the string for the current token, this includes all
92   /// characters (for example, the quotes on strings) in the token.
93   ///
94   /// The returned StringRef points into the source manager's memory buffer, and
95   /// is safe to store across calls to Lex().
getString()96   StringRef getString() const { return Str; }
97 
98   // FIXME: Don't compute this in advance, it makes every token larger, and is
99   // also not generally what we want (it is nicer for recovery etc. to lex 123br
100   // as a single token, then diagnose as an invalid number).
getIntVal()101   int64_t getIntVal() const {
102     assert(Kind == Integer && "This token isn't an integer!");
103     return IntVal;
104   }
105 
106   /// getRegVal - Get the register number for the current token, which should
107   /// be a register.
getRegVal()108   unsigned getRegVal() const {
109     assert(Kind == Register && "This token isn't a register!");
110     return static_cast<unsigned>(IntVal);
111   }
112 };
113 
114 /// MCAsmLexer - Generic assembler lexer interface, for use by target specific
115 /// assembly lexers.
116 class MCAsmLexer {
117   /// The current token, stored in the base class for faster access.
118   AsmToken CurTok;
119 
120   /// The location and description of the current error
121   SMLoc ErrLoc;
122   std::string Err;
123 
124   MCAsmLexer(const MCAsmLexer &);   // DO NOT IMPLEMENT
125   void operator=(const MCAsmLexer &);  // DO NOT IMPLEMENT
126 protected: // Can only create subclasses.
127   const char *TokStart;
128 
129   MCAsmLexer();
130 
131   virtual AsmToken LexToken() = 0;
132 
SetError(const SMLoc & errLoc,const std::string & err)133   void SetError(const SMLoc &errLoc, const std::string &err) {
134     ErrLoc = errLoc;
135     Err = err;
136   }
137 
138 public:
139   virtual ~MCAsmLexer();
140 
141   /// Lex - Consume the next token from the input stream and return it.
142   ///
143   /// The lexer will continuosly return the end-of-file token once the end of
144   /// the main input file has been reached.
Lex()145   const AsmToken &Lex() {
146     return CurTok = LexToken();
147   }
148 
149   virtual StringRef LexUntilEndOfStatement() = 0;
150 
151   /// getLoc - Get the current source location.
152   SMLoc getLoc() const;
153 
154   /// getTok - Get the current (last) lexed token.
getTok()155   const AsmToken &getTok() {
156     return CurTok;
157   }
158 
159   /// getErrLoc - Get the current error location
getErrLoc()160   const SMLoc &getErrLoc() {
161     return ErrLoc;
162   }
163 
164   /// getErr - Get the current error string
getErr()165   const std::string &getErr() {
166     return Err;
167   }
168 
169   /// getKind - Get the kind of current token.
getKind()170   AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
171 
172   /// is - Check if the current token has kind \arg K.
is(AsmToken::TokenKind K)173   bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
174 
175   /// isNot - Check if the current token has kind \arg K.
isNot(AsmToken::TokenKind K)176   bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
177 };
178 
179 } // End llvm namespace
180 
181 #endif
182