1 /*
2 * Copyright (c) 2022 Huawei Device Co., Ltd.
3 *
4 * HDF is dual licensed: you can use it either under the terms of
5 * the GPL, or the BSD license, at your option.
6 * See the LICENSE file in the root of this repository for complete details.
7 */
8
9 #include "lexer/lexer.h"
10 #include "util/logger.h"
11 #include "util/string_builder.h"
12 #include "util/string_helper.h"
13
14 namespace OHOS {
15 namespace HDI {
16 Lexer::StrTokenTypeMap Lexer::keyWords_ = {
17 {"boolean", TokenType::BOOLEAN },
18 {"byte", TokenType::BYTE },
19 {"short", TokenType::SHORT },
20 {"int", TokenType::INT },
21 {"long", TokenType::LONG },
22 {"String", TokenType::STRING },
23 {"float", TokenType::FLOAT },
24 {"double", TokenType::DOUBLE },
25 {"FileDescriptor", TokenType::FD },
26 {"Ashmem", TokenType::ASHMEM },
27 {"BufferHandle", TokenType::BUFFER_HANDLE},
28 {"List", TokenType::LIST },
29 {"Map", TokenType::MAP },
30 {"SharedMemQueue", TokenType::SMQ },
31 {"char", TokenType::CHAR },
32 {"unsigned", TokenType::UNSIGNED },
33 {"enum", TokenType::ENUM },
34 {"struct", TokenType::STRUCT },
35 {"union", TokenType::UNION },
36 {"package", TokenType::PACKAGE },
37 {"sequenceable", TokenType::SEQ },
38 {"import", TokenType::IMPORT },
39 {"interface", TokenType::INTERFACE },
40 {"extends", TokenType::EXTENDS },
41 {"oneway", TokenType::ONEWAY },
42 {"callback", TokenType::CALLBACK },
43 {"full", TokenType::FULL },
44 {"lite", TokenType::LITE },
45 {"in", TokenType::IN },
46 {"out", TokenType::OUT },
47 };
48
49 Lexer::StrTokenTypeMap Lexer::symbols_ = {
50 {".", TokenType::DOT },
51 {",", TokenType::COMMA },
52 {":", TokenType::COLON },
53 {"=", TokenType::ASSIGN },
54 {";", TokenType::SEMICOLON },
55 {"{", TokenType::BRACES_LEFT },
56 {"}", TokenType::BRACES_RIGHT },
57 {"[", TokenType::BRACKETS_LEFT },
58 {"]", TokenType::BRACKETS_RIGHT },
59 {"(", TokenType::PARENTHESES_LEFT },
60 {")", TokenType::PARENTHESES_RIGHT },
61 {"<", TokenType::ANGLE_BRACKETS_LEFT },
62 {">", TokenType::ANGLE_BRACKETS_RIGHT},
63 {"+", TokenType::ADD },
64 {"-", TokenType::SUB },
65 {"*", TokenType::STAR },
66 {"/", TokenType::SLASH },
67 {"%", TokenType::PERCENT_SIGN },
68 {"<<", TokenType::LEFT_SHIFT },
69 {">>", TokenType::RIGHT_SHIFT },
70 {"&", TokenType::AND },
71 {"^", TokenType::XOR },
72 {"|", TokenType::OR },
73 {"~", TokenType::TILDE },
74 {"++", TokenType::PPLUS },
75 {"--", TokenType::MMINUS },
76 };
77
Lexer()78 Lexer::Lexer() : filePath_(), file_(nullptr), mode_(ParseMode::DECL_MODE), havePeek_(false), curToken_() {}
79
Reset(const std::string & filePath)80 bool Lexer::Reset(const std::string &filePath)
81 {
82 file_ = std::make_unique<File>(filePath, int(File::READ));
83 if (file_ == nullptr || !file_->IsValid()) {
84 return false;
85 }
86
87 havePeek_ = false;
88 return true;
89 }
90
PeekToken(bool skipComment)91 Token Lexer::PeekToken(bool skipComment)
92 {
93 if (!havePeek_) {
94 ReadToken(curToken_, skipComment);
95 havePeek_ = true;
96 }
97 return curToken_;
98 }
99
GetToken(bool skipComment)100 Token Lexer::GetToken(bool skipComment)
101 {
102 if (!havePeek_) {
103 ReadToken(curToken_, skipComment);
104 }
105 havePeek_ = false;
106 return curToken_;
107 }
108
SkipCurrentLine()109 void Lexer::SkipCurrentLine()
110 {
111 while (!file_->IsEof()) {
112 char c = file_->GetChar();
113 if (c == '\n') {
114 file_->GetChar();
115 break;
116 }
117 }
118 havePeek_ = false;
119 }
120
SkipCurrentLine(char untilChar)121 bool Lexer::SkipCurrentLine(char untilChar)
122 {
123 bool ret = true;
124 while (!file_->IsEof()) {
125 int c = file_->GetChar();
126 if (c == untilChar) {
127 ret = true;
128 break;
129 }
130 if (c == '\n') {
131 file_->GetChar();
132 ret = false;
133 break;
134 }
135 }
136 havePeek_ = false;
137 return ret;
138 }
139
Skip(char untilChar)140 void Lexer::Skip(char untilChar)
141 {
142 while (!file_->IsEof()) {
143 int c = file_->GetChar();
144 if (c == untilChar) {
145 break;
146 }
147 }
148 havePeek_ = false;
149 }
150
SkipToken(TokenType tokenType)151 void Lexer::SkipToken(TokenType tokenType)
152 {
153 while (curToken_.kind_ != tokenType) {
154 GetToken(false);
155 }
156 }
157
SkipUntilToken(TokenType tokenType)158 void Lexer::SkipUntilToken(TokenType tokenType)
159 {
160 Token token = PeekToken();
161 while (token.kind_ != tokenType) {
162 GetToken(false);
163 token = PeekToken();
164 }
165 }
166
SkipEof()167 void Lexer::SkipEof()
168 {
169 while (!file_->IsEof()) {}
170 havePeek_ = false;
171 }
172
ReadToken(Token & token,bool skipComment)173 void Lexer::ReadToken(Token &token, bool skipComment)
174 {
175 if (!file_->IsEof()) {
176 InitCurToken(token);
177 }
178 while (!file_->IsEof()) {
179 char c = file_->PeekChar();
180 if (isspace(c)) {
181 file_->GetChar();
182 continue;
183 }
184 token.location_.row_ = file_->GetCharLineNumber();
185 token.location_.col_ = file_->GetCharColumnNumber();
186 if (isalpha(c) || c == '_') {
187 ReadId(token);
188 return;
189 } else if (isdigit(c)) {
190 ReadNum(token);
191 return;
192 } else if (c == '<') {
193 ReadShiftLeftOp(token);
194 return;
195 } else if (c == '>') {
196 ReadShiftRightOp(token);
197 return;
198 } else if (c == '+') {
199 ReadPPlusOp(token);
200 return;
201 } else if (c == '-') {
202 ReadMMinusOp(token);
203 return;
204 } else if (c == '/') {
205 ReadComment(token);
206 if ((token.kind_ == TokenType::COMMENT_BLOCK || token.kind_ == TokenType::COMMENT_LINE) && skipComment) {
207 InitCurToken(token);
208 continue;
209 }
210 return;
211 }
212 ReadSymbolToken(token);
213 return;
214 }
215 token.kind_ = TokenType::END_OF_FILE;
216 token.value_ = "";
217 }
218
InitCurToken(Token & token)219 void Lexer::InitCurToken(Token &token)
220 {
221 token.kind_ = TokenType::UNKNOWN;
222 token.location_.filePath_ = file_->GetPath();
223 token.location_.row_ = 0;
224 token.location_.col_ = 0;
225 token.value_ = "";
226 }
227
ReadId(Token & token)228 void Lexer::ReadId(Token &token)
229 {
230 char c = file_->GetChar();
231 StringBuilder sb;
232 sb.Append(c);
233 while (!file_->IsEof()) {
234 c = file_->PeekChar();
235 if (isalpha(c) || isdigit(c) || c == '_' || c == '.') {
236 c = file_->GetChar();
237 sb.Append(c);
238 continue;
239 }
240 if (isspace(c)) {
241 file_->GetChar();
242 }
243 break;
244 }
245
246 std::string key = sb.ToString();
247 auto it = keyWords_.find(key);
248 token.kind_ = (it == keyWords_.end()) ? TokenType::ID : it->second;
249 token.value_ = sb.ToString();
250 }
251
ReadNum(Token & token)252 void Lexer::ReadNum(Token &token)
253 {
254 char c = file_->PeekChar();
255 switch (c) {
256 case '0': {
257 file_->GetChar();
258 c = file_->PeekChar();
259 if (c == 'b' || c == 'B') {
260 // binary number
261 ReadBinaryNum(token);
262 } else if (isdigit(c)) {
263 // octal number
264 return ReadOctNum(token);
265 } else if (c == 'X' || c == 'x') {
266 // hexadecimal number
267 return ReadHexNum(token);
268 } else {
269 // decimal number 0
270 token.kind_ = TokenType::NUM;
271 token.value_ = "0";
272 }
273 break;
274 }
275 default:
276 ReadDecNum(token);
277 break;
278 }
279 }
280
ReadBinaryNum(Token & token)281 void Lexer::ReadBinaryNum(Token &token)
282 {
283 StringBuilder sb;
284 char c = file_->GetChar(); // read 'b' or 'B'
285 sb.AppendFormat("0%c", c);
286 bool err = true;
287
288 while (!file_->IsEof()) {
289 c = file_->PeekChar();
290 if (c == '0' || c == '1') {
291 sb.Append(c);
292 file_->GetChar();
293 err = false;
294 } else {
295 break;
296 }
297 }
298
299 token.kind_ = err ? TokenType::UNKNOWN : TokenType::NUM;
300 token.value_ = sb.ToString();
301 }
302
ReadOctNum(Token & token)303 void Lexer::ReadOctNum(Token &token)
304 {
305 StringBuilder sb;
306 sb.Append("0");
307 bool err = false;
308
309 while (!file_->IsEof()) {
310 char c = file_->PeekChar();
311 if (!isdigit(c)) {
312 break;
313 }
314
315 if (!(c >= '0' && c <= '7')) {
316 err = true;
317 }
318 sb.Append(c);
319 file_->GetChar();
320 }
321
322 token.kind_ = err ? TokenType::UNKNOWN : TokenType::NUM;
323 token.value_ = sb.ToString();
324 }
325
ReadHexNum(Token & token)326 void Lexer::ReadHexNum(Token &token)
327 {
328 StringBuilder sb;
329 char c = file_->GetChar(); // read 'x' or 'X'
330 sb.AppendFormat("0%c", c);
331 bool err = true;
332
333 while (!file_->IsEof()) {
334 c = file_->PeekChar();
335 if (isdigit(c) || ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) {
336 sb.Append(c);
337 file_->GetChar();
338 err = false;
339 } else {
340 break;
341 }
342 }
343
344 token.kind_ = err ? TokenType::UNKNOWN : TokenType::NUM;
345 token.value_ = sb.ToString();
346 }
347
ReadDecNum(Token & token)348 void Lexer::ReadDecNum(Token &token)
349 {
350 StringBuilder sb;
351 char c = file_->GetChar();
352 sb.Append(c);
353
354 while (!file_->IsEof()) {
355 c = file_->PeekChar();
356 if (!isdigit(c)) {
357 break;
358 }
359
360 sb.Append(c);
361 file_->GetChar();
362 }
363
364 token.kind_ = TokenType::NUM;
365 token.value_ = sb.ToString();
366 }
367
ReadShiftLeftOp(Token & token)368 void Lexer::ReadShiftLeftOp(Token &token)
369 {
370 char c = file_->GetChar();
371 char next = file_->PeekChar();
372 if (next == '<') {
373 file_->GetChar();
374 token.kind_ = TokenType::LEFT_SHIFT;
375 token.value_ = "<<";
376 return;
377 }
378
379 std::string symbol = StringHelper::Format("%c", c);
380 auto iter = symbols_.find(symbol);
381 token.kind_ = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
382 token.value_ = symbol;
383 }
384
ReadShiftRightOp(Token & token)385 void Lexer::ReadShiftRightOp(Token &token)
386 {
387 char c = file_->GetChar();
388 char next = file_->PeekChar();
389 if (next == '>' && mode_ == ParseMode::EXPR_MODE) {
390 file_->GetChar();
391 token.kind_ = TokenType::RIGHT_SHIFT;
392 token.value_ = ">>";
393 return;
394 }
395
396 std::string symbol = StringHelper::Format("%c", c);
397 auto iter = symbols_.find(symbol);
398 token.kind_ = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
399 token.value_ = symbol;
400 }
401
ReadPPlusOp(Token & token)402 void Lexer::ReadPPlusOp(Token &token)
403 {
404 char c = file_->GetChar();
405 char next = file_->PeekChar();
406 if (next == '+') {
407 file_->GetChar();
408 token.kind_ = TokenType::PPLUS;
409 token.value_ = "++";
410 return;
411 }
412
413 std::string symbol = StringHelper::Format("%c", c);
414 auto iter = symbols_.find(symbol);
415 token.kind_ = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
416 token.value_ = symbol;
417 }
418
ReadMMinusOp(Token & token)419 void Lexer::ReadMMinusOp(Token &token)
420 {
421 char c = file_->GetChar();
422 char next = file_->PeekChar();
423 if (next == '-') {
424 file_->GetChar();
425 token.kind_ = TokenType::MMINUS;
426 token.value_ = "--";
427 return;
428 }
429
430 std::string symbol = StringHelper::Format("%c", c);
431 auto iter = symbols_.find(symbol);
432 token.kind_ = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
433 token.value_ = symbol;
434 }
435
ReadComment(Token & token)436 void Lexer::ReadComment(Token &token)
437 {
438 char c = file_->GetChar();
439 char next = file_->PeekChar();
440 if (next == '/') {
441 ReadLineComment(token);
442 return;
443 } else if (next == '*') {
444 ReadBlockComment(token);
445 return;
446 }
447
448 std::string symbol = StringHelper::Format("%c", c);
449 auto iter = symbols_.find(symbol);
450 token.kind_ = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
451 token.value_ = symbol;
452 }
453
ReadLineComment(Token & token)454 void Lexer::ReadLineComment(Token &token)
455 {
456 StringBuilder sb;
457 char c = file_->GetChar();
458 sb.AppendFormat("/%c", c);
459
460 while (!file_->IsEof()) {
461 c = file_->GetChar();
462 if (c == '\n') {
463 break;
464 }
465 sb.Append(c);
466 }
467
468 token.kind_ = TokenType::COMMENT_LINE;
469 token.value_ = sb.ToString();
470 }
471
ReadBlockComment(Token & token)472 void Lexer::ReadBlockComment(Token &token)
473 {
474 StringBuilder sb;
475 char c = file_->GetChar();
476 sb.AppendFormat("/%c", c);
477
478 while (!file_->IsEof()) {
479 c = file_->GetChar();
480 sb.Append(c);
481
482 if (c == '*' && file_->PeekChar() == '/') {
483 c = file_->GetChar();
484 sb.Append(c);
485 break;
486 }
487 }
488
489 token.kind_ = TokenType::COMMENT_BLOCK;
490 token.value_ = sb.ToString();
491 }
492
ReadSymbolToken(Token & token)493 void Lexer::ReadSymbolToken(Token &token)
494 {
495 char c = file_->GetChar();
496 std::string symbol = StringHelper::Format("%c", c);
497 auto iter = symbols_.find(symbol);
498 token.kind_ = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
499 token.value_ = symbol;
500 }
501 } // namespace HDI
502 } // namespace OHOS