1 //===- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H 10 #define LLVM_MC_MCPARSER_MCASMLEXER_H 11 12 #include "llvm/ADT/ArrayRef.h" 13 #include "llvm/ADT/SmallVector.h" 14 #include "llvm/MC/MCAsmMacro.h" 15 #include <algorithm> 16 #include <cassert> 17 #include <cstddef> 18 #include <cstdint> 19 #include <string> 20 21 namespace llvm { 22 23 /// A callback class which is notified of each comment in an assembly file as 24 /// it is lexed. 25 class AsmCommentConsumer { 26 public: 27 virtual ~AsmCommentConsumer() = default; 28 29 /// Callback function for when a comment is lexed. Loc is the start of the 30 /// comment text (excluding the comment-start marker). CommentText is the text 31 /// of the comment, excluding the comment start and end markers, and the 32 /// newline for single-line comments. 33 virtual void HandleComment(SMLoc Loc, StringRef CommentText) = 0; 34 }; 35 36 37 /// Generic assembler lexer interface, for use by target specific assembly 38 /// lexers. 39 class MCAsmLexer { 40 /// The current token, stored in the base class for faster access. 41 SmallVector<AsmToken, 1> CurTok; 42 43 /// The location and description of the current error 44 SMLoc ErrLoc; 45 std::string Err; 46 47 protected: // Can only create subclasses. 48 const char *TokStart = nullptr; 49 bool SkipSpace = true; 50 bool AllowAtInIdentifier; 51 bool IsAtStartOfStatement = true; 52 bool LexMasmIntegers = false; 53 AsmCommentConsumer *CommentConsumer = nullptr; 54 55 MCAsmLexer(); 56 57 virtual AsmToken LexToken() = 0; 58 SetError(SMLoc errLoc,const std::string & err)59 void SetError(SMLoc errLoc, const std::string &err) { 60 ErrLoc = errLoc; 61 Err = err; 62 } 63 64 public: 65 MCAsmLexer(const MCAsmLexer &) = delete; 66 MCAsmLexer &operator=(const MCAsmLexer &) = delete; 67 virtual ~MCAsmLexer(); 68 69 /// Consume the next token from the input stream and return it. 70 /// 71 /// The lexer will continuously return the end-of-file token once the end of 72 /// the main input file has been reached. Lex()73 const AsmToken &Lex() { 74 assert(!CurTok.empty()); 75 // Mark if we parsing out a EndOfStatement. 76 IsAtStartOfStatement = CurTok.front().getKind() == AsmToken::EndOfStatement; 77 CurTok.erase(CurTok.begin()); 78 // LexToken may generate multiple tokens via UnLex but will always return 79 // the first one. Place returned value at head of CurTok vector. 80 if (CurTok.empty()) { 81 AsmToken T = LexToken(); 82 CurTok.insert(CurTok.begin(), T); 83 } 84 return CurTok.front(); 85 } 86 UnLex(AsmToken const & Token)87 void UnLex(AsmToken const &Token) { 88 IsAtStartOfStatement = false; 89 CurTok.insert(CurTok.begin(), Token); 90 } 91 isAtStartOfStatement()92 bool isAtStartOfStatement() { return IsAtStartOfStatement; } 93 94 virtual StringRef LexUntilEndOfStatement() = 0; 95 96 /// Get the current source location. 97 SMLoc getLoc() const; 98 99 /// Get the current (last) lexed token. getTok()100 const AsmToken &getTok() const { 101 return CurTok[0]; 102 } 103 104 /// Look ahead at the next token to be lexed. 105 const AsmToken peekTok(bool ShouldSkipSpace = true) { 106 AsmToken Tok; 107 108 MutableArrayRef<AsmToken> Buf(Tok); 109 size_t ReadCount = peekTokens(Buf, ShouldSkipSpace); 110 111 assert(ReadCount == 1); 112 (void)ReadCount; 113 114 return Tok; 115 } 116 117 /// Look ahead an arbitrary number of tokens. 118 virtual size_t peekTokens(MutableArrayRef<AsmToken> Buf, 119 bool ShouldSkipSpace = true) = 0; 120 121 /// Get the current error location getErrLoc()122 SMLoc getErrLoc() { 123 return ErrLoc; 124 } 125 126 /// Get the current error string getErr()127 const std::string &getErr() { 128 return Err; 129 } 130 131 /// Get the kind of current token. getKind()132 AsmToken::TokenKind getKind() const { return getTok().getKind(); } 133 134 /// Check if the current token has kind \p K. is(AsmToken::TokenKind K)135 bool is(AsmToken::TokenKind K) const { return getTok().is(K); } 136 137 /// Check if the current token has kind \p K. isNot(AsmToken::TokenKind K)138 bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); } 139 140 /// Set whether spaces should be ignored by the lexer setSkipSpace(bool val)141 void setSkipSpace(bool val) { SkipSpace = val; } 142 getAllowAtInIdentifier()143 bool getAllowAtInIdentifier() { return AllowAtInIdentifier; } setAllowAtInIdentifier(bool v)144 void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; } 145 setCommentConsumer(AsmCommentConsumer * CommentConsumer)146 void setCommentConsumer(AsmCommentConsumer *CommentConsumer) { 147 this->CommentConsumer = CommentConsumer; 148 } 149 150 /// Set whether to lex masm-style binary and hex literals. They look like 151 /// 0b1101 and 0ABCh respectively. setLexMasmIntegers(bool V)152 void setLexMasmIntegers(bool V) { LexMasmIntegers = V; } 153 }; 154 155 } // end namespace llvm 156 157 #endif // LLVM_MC_MCPARSER_MCASMLEXER_H 158