1 /* 2 * Copyright (c) 2021-2023 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef PANDA_ASSEMBLER_LEXER_H 17 #define PANDA_ASSEMBLER_LEXER_H 18 19 #include <array> 20 #include <iostream> 21 #include <string> 22 #include <string_view> 23 #include <unordered_map> 24 #include <vector> 25 26 #include "define.h" 27 #include "error.h" 28 #include "isa.h" 29 #include "utils/logger.h" 30 31 namespace panda::pandasm { 32 33 // NOLINTBEGIN(misc-non-private-member-variables-in-classes) 34 struct Token { 35 enum class Type { 36 ID_BAD = 0, 37 /* delimiters */ 38 DEL_COMMA, /* , */ 39 DEL_COLON, /* : */ 40 DEL_BRACE_L, /* { */ 41 DEL_BRACE_R, /* } */ 42 DEL_BRACKET_L, /* ( */ 43 DEL_BRACKET_R, /* ) */ 44 DEL_SQUARE_BRACKET_L, /* [ */ 45 DEL_SQUARE_BRACKET_R, /* ] */ 46 DEL_GT, /* > */ 47 DEL_LT, /* < */ 48 DEL_EQ, /* = */ 49 DEL_DOT, /* . */ 50 ID, /* other */ 51 ID_STRING, /* string literal */ 52 OPERATION, /* special */ 53 54 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) 55 #define OPLIST(inst_code, name, optype, width, flags, dst_idx, src_idxs, prof_size) \ 56 ID_OP_##inst_code, /* command type list */ 57 PANDA_INSTRUCTION_LIST(OPLIST) 58 #undef OPLIST 59 KEYWORD, /* special */ 60 61 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) 62 #define KEYWORDS(name, inst_code) ID_##inst_code, /* keyword type List */ 63 KEYWORDS_LIST(KEYWORDS) 64 #undef KEYWORDS 65 }; 66 67 std::string wholeLine; 68 size_t boundLeft; /* right and left bounds of tokens */ 69 size_t boundRight; 70 Type type; 71 TokenToken72 Token() : Token(0, 0, Type::ID_BAD, "") {} 73 TokenToken74 Token(size_t bL, size_t bR, Type t, std::string begOfLine) 75 : wholeLine(std::move(begOfLine)), boundLeft(bL), boundRight(bR), type(t) 76 { 77 } 78 }; 79 80 using Tokens = std::pair<std::vector<Token>, Error>; 81 82 using TokenSet = const std::vector<std::vector<Token>>; 83 84 struct Line { 85 std::vector<Token> tokens; 86 std::string buffer; /* Raw line, as read from the file */ 87 size_t pos {0}; /* current line position */ 88 size_t end; 89 LineLine90 explicit Line(std::string str) : buffer(std::move(str)), end(buffer.size()) {} 91 }; 92 // NOLINTEND(misc-non-private-member-variables-in-classes) 93 94 class Lexer { 95 public: 96 PANDA_PUBLIC_API Lexer(); 97 PANDA_PUBLIC_API ~Lexer(); 98 NO_MOVE_SEMANTIC(Lexer); 99 NO_COPY_SEMANTIC(Lexer); 100 101 /* 102 * The main function of Tokenizing, which takes a string. 103 * Returns a vector of tokens. 104 */ 105 PANDA_PUBLIC_API Tokens TokenizeString(const std::string &sourceStr); 106 107 private: 108 std::vector<Line> lines_; 109 Line *currLine_ {nullptr}; 110 Error err_; 111 112 bool Eol() const; /* End of line */ 113 bool LexString(); 114 void LexTokens(); 115 void UpdateCurLinePos(); 116 void LexPreprocess(); 117 void SkipSpace(); 118 void AnalyzeLine(); 119 Token::Type LexGetType(size_t beg, size_t end) const; 120 }; 121 122 /* 123 * Returns a string representation of a token type. 124 */ 125 std::string_view TokenTypeWhat(Token::Type t); 126 127 } // namespace panda::pandasm 128 129 #endif // PANDA_ASSEMBLER_LEXER_H 130