• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef ASSEMBLER_LEXER_H
17 #define ASSEMBLER_LEXER_H
18 
19 #include <array>
20 #include <iostream>
21 #include <string>
22 #include <string_view>
23 #include <unordered_map>
24 #include <vector>
25 
26 #include "define.h"
27 #include "error.h"
28 #include "isa.h"
29 #include "utils/logger.h"
30 
31 namespace panda::pandasm {
32 
33 struct Token {
34     enum class Type {
35         ID_BAD = 0,
36         /* delimiters */
37         DEL_COMMA,                                                                          /* , */
38         DEL_COLON,                                                                          /* : */
39         DEL_BRACE_L,                                                                        /* { */
40         DEL_BRACE_R,                                                                        /* } */
41         DEL_BRACKET_L,                                                                      /* ( */
42         DEL_BRACKET_R,                                                                      /* ) */
43         DEL_SQUARE_BRACKET_L,                                                               /* [ */
44         DEL_SQUARE_BRACKET_R,                                                               /* ] */
45         DEL_GT,                                                                             /* > */
46         DEL_LT,                                                                             /* < */
47         DEL_EQ,                                                                             /* = */
48         DEL_DOT,                                                                            /* . */
49         ID,                                                                                 /* other */
50         ID_STRING,                                                                          /* string literal */
51         OPERATION,                                                                          /* special */
52 #define OPLIST(inst_code, name, optype, width, flags, dst_idx, src_idxs) ID_OP_##inst_code, /* command type list */
53         PANDA_INSTRUCTION_LIST(OPLIST)
54 #undef OPLIST
55             KEYWORD,                              /* special */
56 #define KEYWORDS(name, inst_code) ID_##inst_code, /* keyword type List */
57         KEYWORDS_LIST(KEYWORDS)
58 #undef KEYWORDS
59     };
60 
61     std::string whole_line;
62     size_t bound_left; /* right and left bounds of tokens */
63     size_t bound_right;
64     Type type;
65 
TokenToken66     Token() : Token(0, 0, Type::ID_BAD, "") {}
67 
TokenToken68     Token(size_t b_l, size_t b_r, Type t, std::string beg_of_line)
69         : whole_line(std::move(beg_of_line)), bound_left(b_l), bound_right(b_r), type(t)
70     {
71     }
72 };
73 
74 using Tokens = std::pair<std::vector<Token>, Error>;
75 
76 using TokenSet = const std::vector<std::vector<Token>>;
77 
78 struct Line {
79     std::vector<Token> tokens;
80     std::string buffer; /* Raw line, as read from the file */
81     size_t pos;         /* current line position */
82     size_t end;
83 
LineLine84     explicit Line(std::string str) : buffer(std::move(str)), pos(0), end(buffer.size()) {}
85 };
86 
87 class Lexer {
88 public:
89     Lexer();
90     ~Lexer();
91     NO_MOVE_SEMANTIC(Lexer);
92     NO_COPY_SEMANTIC(Lexer);
93 
94     /*
95      * The main function of Tokenizing, which takes a string.
96      * Returns a vector of tokens.
97      */
98     Tokens TokenizeString(const std::string &);
99 
100 private:
101     std::vector<Line> lines_;
102     Line *curr_line_;
103     Error err_;
104 
105     bool Eol() const; /* End of line */
106     bool LexString();
107     void LexTokens();
108     void LexPreprocess();
109     void SkipSpace();
110     void AnalyzeLine();
111     Token::Type LexGetType(size_t beg, size_t end) const;
112 };
113 
114 /*
115  * Returns a string representation of a token type.
116  */
117 std::string_view TokenTypeWhat(Token::Type t);
118 
119 }  // namespace panda::pandasm
120 
121 #endif  // ASSEMBLER_LEXER_H
122