1 /** 2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef ASSEMBLER_LEXER_H 17 #define ASSEMBLER_LEXER_H 18 19 #include <array> 20 #include <iostream> 21 #include <string> 22 #include <string_view> 23 #include <unordered_map> 24 #include <vector> 25 26 #include "define.h" 27 #include "error.h" 28 #include "isa.h" 29 #include "utils/logger.h" 30 31 namespace panda::pandasm { 32 33 struct Token { 34 enum class Type { 35 ID_BAD = 0, 36 /* delimiters */ 37 DEL_COMMA, /* , */ 38 DEL_COLON, /* : */ 39 DEL_BRACE_L, /* { */ 40 DEL_BRACE_R, /* } */ 41 DEL_BRACKET_L, /* ( */ 42 DEL_BRACKET_R, /* ) */ 43 DEL_SQUARE_BRACKET_L, /* [ */ 44 DEL_SQUARE_BRACKET_R, /* ] */ 45 DEL_GT, /* > */ 46 DEL_LT, /* < */ 47 DEL_EQ, /* = */ 48 DEL_DOT, /* . */ 49 ID, /* other */ 50 ID_STRING, /* string literal */ 51 OPERATION, /* special */ 52 #define OPLIST(inst_code, name, optype, width, flags, dst_idx, src_idxs) ID_OP_##inst_code, /* command type list */ 53 PANDA_INSTRUCTION_LIST(OPLIST) 54 #undef OPLIST 55 KEYWORD, /* special */ 56 #define KEYWORDS(name, inst_code) ID_##inst_code, /* keyword type List */ 57 KEYWORDS_LIST(KEYWORDS) 58 #undef KEYWORDS 59 }; 60 61 std::string whole_line; 62 size_t bound_left; /* right and left bounds of tokens */ 63 size_t bound_right; 64 Type type; 65 TokenToken66 Token() : Token(0, 0, Type::ID_BAD, "") {} 67 TokenToken68 Token(size_t b_l, size_t b_r, Type t, std::string beg_of_line) 69 : whole_line(std::move(beg_of_line)), bound_left(b_l), bound_right(b_r), type(t) 70 { 71 } 72 }; 73 74 using Tokens = std::pair<std::vector<Token>, Error>; 75 76 using TokenSet = const std::vector<std::vector<Token>>; 77 78 struct Line { 79 std::vector<Token> tokens; 80 std::string buffer; /* Raw line, as read from the file */ 81 size_t pos; /* current line position */ 82 size_t end; 83 LineLine84 explicit Line(std::string str) : buffer(std::move(str)), pos(0), end(buffer.size()) {} 85 }; 86 87 class Lexer { 88 public: 89 Lexer(); 90 ~Lexer(); 91 NO_MOVE_SEMANTIC(Lexer); 92 NO_COPY_SEMANTIC(Lexer); 93 94 /* 95 * The main function of Tokenizing, which takes a string. 96 * Returns a vector of tokens. 97 */ 98 Tokens TokenizeString(const std::string &); 99 100 private: 101 std::vector<Line> lines_; 102 Line *curr_line_; 103 Error err_; 104 105 bool Eol() const; /* End of line */ 106 bool LexString(); 107 void LexTokens(); 108 void LexPreprocess(); 109 void SkipSpace(); 110 void AnalyzeLine(); 111 Token::Type LexGetType(size_t beg, size_t end) const; 112 }; 113 114 /* 115 * Returns a string representation of a token type. 116 */ 117 std::string_view TokenTypeWhat(Token::Type t); 118 119 } // namespace panda::pandasm 120 121 #endif // ASSEMBLER_LEXER_H 122