• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef PANDA_ASSEMBLER_LEXER_H
17 #define PANDA_ASSEMBLER_LEXER_H
18 
19 #include <array>
20 #include <iostream>
21 #include <string>
22 #include <string_view>
23 #include <unordered_map>
24 #include <vector>
25 
26 #include "define.h"
27 #include "error.h"
28 #include "isa.h"
29 #include "utils/logger.h"
30 
31 namespace panda::pandasm {
32 
33 // NOLINTBEGIN(misc-non-private-member-variables-in-classes)
34 struct Token {
35     enum class Type {
36         ID_BAD = 0,
37         /* delimiters */
38         DEL_COMMA,            /* , */
39         DEL_COLON,            /* : */
40         DEL_BRACE_L,          /* { */
41         DEL_BRACE_R,          /* } */
42         DEL_BRACKET_L,        /* ( */
43         DEL_BRACKET_R,        /* ) */
44         DEL_SQUARE_BRACKET_L, /* [ */
45         DEL_SQUARE_BRACKET_R, /* ] */
46         DEL_GT,               /* > */
47         DEL_LT,               /* < */
48         DEL_EQ,               /* = */
49         DEL_DOT,              /* . */
50         ID,                   /* other */
51         ID_STRING,            /* string literal */
52         OPERATION,            /* special */
53 
54 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
55 #define OPLIST(inst_code, name, optype, width, flags, dst_idx, src_idxs, prof_size) \
56     ID_OP_##inst_code, /* command type list */
57         PANDA_INSTRUCTION_LIST(OPLIST)
58 #undef OPLIST
59             KEYWORD, /* special */
60 
61 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
62 #define KEYWORDS(name, inst_code) ID_##inst_code, /* keyword type List */
63         KEYWORDS_LIST(KEYWORDS)
64 #undef KEYWORDS
65     };
66 
67     std::string wholeLine;
68     size_t boundLeft; /* right and left bounds of tokens */
69     size_t boundRight;
70     Type type;
71 
TokenToken72     Token() : Token(0, 0, Type::ID_BAD, "") {}
73 
TokenToken74     Token(size_t bL, size_t bR, Type t, std::string begOfLine)
75         : wholeLine(std::move(begOfLine)), boundLeft(bL), boundRight(bR), type(t)
76     {
77     }
78 };
79 
80 using Tokens = std::pair<std::vector<Token>, Error>;
81 
82 using TokenSet = const std::vector<std::vector<Token>>;
83 
84 struct Line {
85     std::vector<Token> tokens;
86     std::string buffer; /* Raw line, as read from the file */
87     size_t pos {0};     /* current line position */
88     size_t end;
89 
LineLine90     explicit Line(std::string str) : buffer(std::move(str)), end(buffer.size()) {}
91 };
92 // NOLINTEND(misc-non-private-member-variables-in-classes)
93 
94 class Lexer {
95 public:
96     PANDA_PUBLIC_API Lexer();
97     PANDA_PUBLIC_API ~Lexer();
98     NO_MOVE_SEMANTIC(Lexer);
99     NO_COPY_SEMANTIC(Lexer);
100 
101     /*
102      * The main function of Tokenizing, which takes a string.
103      * Returns a vector of tokens.
104      */
105     PANDA_PUBLIC_API Tokens TokenizeString(const std::string &sourceStr);
106 
107 private:
108     std::vector<Line> lines_;
109     Line *currLine_ {nullptr};
110     Error err_;
111 
112     bool Eol() const; /* End of line */
113     bool LexString();
114     void LexTokens();
115     void UpdateCurLinePos();
116     void LexPreprocess();
117     void SkipSpace();
118     void AnalyzeLine();
119     Token::Type LexGetType(size_t beg, size_t end) const;
120 };
121 
122 /*
123  * Returns a string representation of a token type.
124  */
125 std::string_view TokenTypeWhat(Token::Type t);
126 
127 }  // namespace panda::pandasm
128 
129 #endif  // PANDA_ASSEMBLER_LEXER_H
130