1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 *
4 * HDF is dual licensed: you can use it either under the terms of
5 * the GPL, or the BSD license, at your option.
6 * See the LICENSE file in the root of this repository for complete details.
7 */
8
9 #include "parser/lexer.h"
10 #include "util/string_builder.h"
11
12 namespace OHOS {
13 namespace HDI {
Lexer()14 Lexer::Lexer()
15 {
16 InitializeKeywords();
17 }
18
~Lexer()19 Lexer::~Lexer()
20 {
21 if (currentFile_ != nullptr) {
22 currentFile_->Close();
23 }
24 }
25
InitializeKeywords()26 void Lexer::InitializeKeywords()
27 {
28 keywords_ = {
29 { "boolean", Token::BOOLEAN }, { "byte", Token::BYTE }, { "callback", Token::CALLBACK },
30 { "char", Token::CHAR}, { "double", Token::DOUBLE }, { "enum", Token::ENUM },
31 { "extends", Token::EXTENDS }, { "float", Token::FLOAT }, { "full", Token::FULL },
32 { "import", Token::IMPORT }, { "in", Token::IN }, { "int", Token::INTEGER }, { "interface", Token::INTERFACE },
33 { "List", Token::LIST }, { "lite", Token::LITE }, { "long", Token::LONG },
34 { "Map", Token::MAP }, { "oneway", Token::ONEWAY }, { "out", Token::OUT },
35 { "package", Token::PACKAGE}, { "sequenceable", Token::SEQUENCEABLE }, { "short", Token::SHORT },
36 { "String", Token::STRING }, { "struct", Token::STRUCT }, { "union", Token::UNION },
37 { "unsigned", Token::UNSIGNED }, { "FileDescriptor", Token::FILEDESCRIPTOR },
38 {"SharedMemQueue", Token::SMEMQUEUE},
39 };
40
41 delimiters_ = {
42 {'<', Token::ANGLE_BRACKETS_LEFT}, {'>', Token::ANGLE_BRACKETS_RIGHT},
43 {'{', Token::BRACES_LEFT}, {'}', Token::BRACES_RIGHT},
44 {'[', Token::BRACKETS_LEFT}, {']', Token::BRACKETS_RIGHT},
45 {',', Token::COMMA}, {'(', Token::PARENTHESES_LEFT},
46 {')', Token::PARENTHESES_RIGHT}, {'.', Token::DOT},
47 {':', Token::COLON}, {';', Token::SEMICOLON},
48 {'=', Token::ASSIGN}
49 };
50
51 tokenDumps_ = {
52 { Token::ANGLE_BRACKETS_LEFT, "<" }, { Token::ANGLE_BRACKETS_RIGHT, ">"}, { Token::BOOLEAN, "boolean"},
53 { Token::BRACES_LEFT, "{"}, { Token::BRACES_RIGHT, "}"}, { Token::BRACKETS_LEFT, "["},
54 { Token::BRACKETS_RIGHT, "]"}, { Token::BYTE, "byte"}, { Token::CALLBACK, "callback"},
55 { Token::CHAR, "char"}, { Token::COLON, ":"}, { Token::COMMA, ","},
56 { Token::DOT, "."}, { Token::DOUBLE, "double"}, { Token::END_OF_FILE, "eof"},
57 { Token::ENUM, "enum"}, { Token::EXTENDS, "extends"}, { Token::FLOAT, "float"},
58 { Token::FULL, "full"}, { Token::IMPORT, "import"}, { Token::IN, "in"},
59 { Token::INTEGER, "int"}, { Token::INTERFACE, "interface"}, { Token::LITE, "lite"},
60 { Token::LIST, "List"}, { Token::LONG, "long"}, { Token::MAP, "Map"},
61 { Token::ONEWAY, "oneway"}, { Token::OUT, "out"}, { Token::PACKAGE, "package"},
62 { Token::SEQUENCEABLE, "sequenceable"}, { Token::STRUCT, "struct"}, { Token::PARENTHESES_LEFT, "("},
63 { Token::PARENTHESES_RIGHT, ")"}, { Token::SEMICOLON, ";"}, { Token::SHORT, "short"},
64 { Token::STRING, "String"}, { Token::UNION, "union"}, { Token::UNSIGNED, "unsigned"},
65 { Token::FILEDESCRIPTOR, "FileDescriptor"}, {Token::SMEMQUEUE, "SharedMemQueue"},
66 };
67 }
68
OpenSourceFile(const String & filePath)69 bool Lexer::OpenSourceFile(const String& filePath)
70 {
71 currentFile_ = std::make_unique<File>(filePath, int(File::READ));
72 if (!currentFile_->IsValid()) {
73 return false;
74 }
75
76 return true;
77 }
78
GetToken(bool skipComment)79 Token Lexer::GetToken(bool skipComment)
80 {
81 if (!havePeek_) {
82 currentToken_ = ReadToken(skipComment);
83 }
84 havePeek_ = false;
85 return currentToken_;
86 }
87
PeekToken(bool skipComment)88 Token Lexer::PeekToken(bool skipComment)
89 {
90 if (!havePeek_) {
91 currentToken_ = ReadToken(skipComment);
92 havePeek_ = true;
93 }
94 return currentToken_;
95 }
96
ReadToken(bool skipComment)97 Token Lexer::ReadToken(bool skipComment)
98 {
99 while (!currentFile_->IsEof()) {
100 char c = currentFile_->GetChar();
101 tokenLineNo_ = currentFile_->GetCharLineNumber();
102 tokenColumnNo_ = currentFile_->GetCharColumnNumber();
103 if (IsSpace(c)) {
104 continue;
105 } else if (IsAlphabet(c) || c == '_') {
106 Token t = ReadIdentifier(c);
107 return t;
108 } else if (IsDecimalDigital(c)) {
109 Token t = ReadDecimalDigital(c);
110 return t;
111 }
112
113 auto iter = delimiters_.find(c);
114 if (iter != delimiters_.end()) {
115 return iter->second;
116 }
117
118 if (c == '/') {
119 if (currentFile_->PeekChar() == '/') {
120 ReadLineComment(c);
121 } else if (currentFile_->PeekChar() == '*') {
122 ReadBlockComment(c);
123 } else {
124 currentToken_ = Token::UNKNOWN;
125 return currentToken_;
126 }
127
128 if (skipComment) {
129 continue;
130 }
131
132 return currentToken_;
133 }
134
135 currentToken_ = Token::UNKNOWN;
136 return currentToken_;
137 }
138 currentToken_ = Token::END_OF_FILE;
139 return currentToken_;
140 }
141
ReadIdentifier(char c)142 Token Lexer::ReadIdentifier(char c)
143 {
144 StringBuilder sb;
145
146 sb.Append(c);
147 while (!currentFile_->IsEof()) {
148 c = currentFile_->PeekChar();
149 if (IsAlphabet(c) || c == '_' || IsDecimalDigital(c) || c == '.') {
150 c = currentFile_->GetChar();
151 sb.Append(c);
152 continue;
153 }
154 if (IsSpace(c)) {
155 currentFile_->GetChar();
156 }
157 break;
158 }
159 String key = sb.ToString();
160 auto it = keywords_.find(key);
161 if (it == keywords_.end()) {
162 identifier_ = key;
163 currentToken_ = Token::IDENTIFIER;
164 } else {
165 currentToken_ = it->second;
166 }
167
168 return currentToken_;
169 }
170
ReadDecimalDigital(char c)171 Token Lexer::ReadDecimalDigital(char c)
172 {
173 StringBuilder sb;
174 sb.Append(c);
175 while (!currentFile_->IsEof()) {
176 c = currentFile_->PeekChar();
177 if (IsDecimalDigital(c)) {
178 c = currentFile_->GetChar();
179 sb.Append(c);
180 continue;
181 } else {
182 break;
183 }
184 }
185 digit_ = sb.ToString();
186 currentToken_ = Token::DIGIT;
187 return currentToken_;
188 }
189
ReadLineComment(char c)190 Token Lexer::ReadLineComment(char c)
191 {
192 StringBuilder sb;
193
194 sb.Append(c);
195 while (!currentFile_->IsEof()) {
196 c = currentFile_->GetChar();
197 if (c == '\n') {
198 break;
199 }
200 sb.Append(c);
201 }
202 comment_ = sb.ToString();
203 currentToken_ = Token::COMMENT_LINE;
204 return currentToken_;
205 }
206
ReadBlockComment(char c)207 Token Lexer::ReadBlockComment(char c)
208 {
209 StringBuilder sb;
210
211 sb.Append(c);
212 while (!currentFile_->IsEof()) {
213 c = currentFile_->GetChar();
214 sb.Append(c);
215 if (c == '*' && currentFile_->PeekChar() == '/') {
216 c = currentFile_->GetChar();
217 sb.Append(c);
218 break;
219 }
220 }
221 comment_ = sb.ToString();
222 currentToken_ = Token::COMMENT_BLOCK;
223 return currentToken_;
224 }
225
SkipCurrentLine()226 void Lexer::SkipCurrentLine()
227 {
228 while (!currentFile_->IsEof()) {
229 char c = currentFile_->GetChar();
230 if (c == '\n') {
231 currentFile_->GetChar();
232 return;
233 }
234 }
235 }
236
SkipCurrentLine(char untilChar)237 bool Lexer::SkipCurrentLine(char untilChar)
238 {
239 while (!currentFile_->IsEof()) {
240 int c = currentFile_->GetChar();
241 if (c == untilChar) {
242 return true;
243 }
244 if (c == '\n') {
245 currentFile_->GetChar();
246 return false;
247 }
248 }
249 return true;
250 }
251
Skip(char untilChar)252 void Lexer::Skip(char untilChar)
253 {
254 while (!currentFile_->IsEof()) {
255 int c = currentFile_->GetChar();
256 if (c == untilChar) {
257 return;
258 }
259 }
260 }
261
SkipEof()262 void Lexer::SkipEof()
263 {
264 while (!currentFile_->IsEof()) {}
265 }
266
TokenToChar(Token token)267 int Lexer::TokenToChar(Token token)
268 {
269 switch (token) {
270 case Token::ANGLE_BRACKETS_LEFT:
271 return '<';
272 case Token::ANGLE_BRACKETS_RIGHT:
273 return '>';
274 case Token::BRACES_LEFT:
275 return '{';
276 case Token::BRACES_RIGHT:
277 return '}';
278 case Token::BRACKETS_LEFT:
279 return '[';
280 case Token::BRACKETS_RIGHT:
281 return ']';
282 case Token::COMMA:
283 return ',';
284 case Token::DOT:
285 return '.';
286 case Token::PARENTHESES_LEFT:
287 return '(';
288 case Token::PARENTHESES_RIGHT:
289 return ')';
290 case Token::COLON:
291 return ':';
292 case Token::SEMICOLON:
293 return ';';
294 default:
295 return -1;
296 }
297 }
298
DumpToken() const299 String Lexer::DumpToken() const
300 {
301 auto iter = tokenDumps_.find(currentToken_);
302 if (iter != tokenDumps_.end()) {
303 return iter->second;
304 }
305
306 if (currentToken_ == Token::COMMENT_BLOCK || currentToken_ == Token::COMMENT_LINE) {
307 return comment_;
308 }
309
310 if (currentToken_ == Token::IDENTIFIER) {
311 return identifier_;
312 }
313
314 return "unknown token";
315 }
316 } // namespace HDI
317 } // namespace OHOS