• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2022 Huawei Device Co., Ltd.
3  *
4  * HDF is dual licensed: you can use it either under the terms of
5  * the GPL, or the BSD license, at your option.
6  * See the LICENSE file in the root of this repository for complete details.
7  */
8 
9 #include "lexer/lexer.h"
10 #include "util/logger.h"
11 #include "util/string_builder.h"
12 #include "util/string_helper.h"
13 
14 namespace OHOS {
15 namespace HDI {
16 Lexer::StrTokenTypeMap Lexer::keyWords_ = {
17     {"boolean",        TokenType::BOOLEAN      },
18     {"byte",           TokenType::BYTE         },
19     {"short",          TokenType::SHORT        },
20     {"int",            TokenType::INT          },
21     {"long",           TokenType::LONG         },
22     {"String",         TokenType::STRING       },
23     {"float",          TokenType::FLOAT        },
24     {"double",         TokenType::DOUBLE       },
25     {"FileDescriptor", TokenType::FD           },
26     {"Ashmem",         TokenType::ASHMEM       },
27     {"BufferHandle",   TokenType::BUFFER_HANDLE},
28     {"List",           TokenType::LIST         },
29     {"Map",            TokenType::MAP          },
30     {"SharedMemQueue", TokenType::SMQ          },
31     {"char",           TokenType::CHAR         },
32     {"unsigned",       TokenType::UNSIGNED     },
33     {"enum",           TokenType::ENUM         },
34     {"struct",         TokenType::STRUCT       },
35     {"union",          TokenType::UNION        },
36     {"package",        TokenType::PACKAGE      },
37     {"sequenceable",   TokenType::SEQ          },
38     {"import",         TokenType::IMPORT       },
39     {"interface",      TokenType::INTERFACE    },
40     {"extends",        TokenType::EXTENDS      },
41     {"oneway",         TokenType::ONEWAY       },
42     {"callback",       TokenType::CALLBACK     },
43     {"full",           TokenType::FULL         },
44     {"lite",           TokenType::LITE         },
45     {"in",             TokenType::IN           },
46     {"out",            TokenType::OUT          },
47 };
48 
49 Lexer::StrTokenTypeMap Lexer::symbols_ = {
50     {".",  TokenType::DOT                 },
51     {",",  TokenType::COMMA               },
52     {":",  TokenType::COLON               },
53     {"=",  TokenType::ASSIGN              },
54     {";",  TokenType::SEMICOLON           },
55     {"{",  TokenType::BRACES_LEFT         },
56     {"}",  TokenType::BRACES_RIGHT        },
57     {"[",  TokenType::BRACKETS_LEFT       },
58     {"]",  TokenType::BRACKETS_RIGHT      },
59     {"(",  TokenType::PARENTHESES_LEFT    },
60     {")",  TokenType::PARENTHESES_RIGHT   },
61     {"<",  TokenType::ANGLE_BRACKETS_LEFT },
62     {">",  TokenType::ANGLE_BRACKETS_RIGHT},
63     {"+",  TokenType::ADD                 },
64     {"-",  TokenType::SUB                 },
65     {"*",  TokenType::STAR                },
66     {"/",  TokenType::SLASH               },
67     {"%",  TokenType::PERCENT_SIGN        },
68     {"<<", TokenType::LEFT_SHIFT          },
69     {">>", TokenType::RIGHT_SHIFT         },
70     {"&",  TokenType::AND                 },
71     {"^",  TokenType::XOR                 },
72     {"|",  TokenType::OR                  },
73     {"~",  TokenType::TILDE               },
74     {"++", TokenType::PPLUS               },
75     {"--", TokenType::MMINUS              },
76 };
77 
Lexer()78 Lexer::Lexer() : filePath_(), file_(nullptr), mode_(ParseMode::DECL_MODE), havePeek_(false), curToken_() {}
79 
Reset(const std::string & filePath)80 bool Lexer::Reset(const std::string &filePath)
81 {
82     file_ = std::make_unique<File>(filePath, int(File::READ));
83     if (file_ == nullptr || !file_->IsValid()) {
84         return false;
85     }
86 
87     havePeek_ = false;
88     return true;
89 }
90 
PeekToken(bool skipComment)91 Token Lexer::PeekToken(bool skipComment)
92 {
93     if (!havePeek_) {
94         ReadToken(curToken_, skipComment);
95         havePeek_ = true;
96     }
97     return curToken_;
98 }
99 
GetToken(bool skipComment)100 Token Lexer::GetToken(bool skipComment)
101 {
102     if (!havePeek_) {
103         ReadToken(curToken_, skipComment);
104     }
105     havePeek_ = false;
106     return curToken_;
107 }
108 
SkipCurrentLine()109 void Lexer::SkipCurrentLine()
110 {
111     while (!file_->IsEof()) {
112         char c = file_->GetChar();
113         if (c == '\n') {
114             file_->GetChar();
115             break;
116         }
117     }
118     havePeek_ = false;
119 }
120 
SkipCurrentLine(char untilChar)121 bool Lexer::SkipCurrentLine(char untilChar)
122 {
123     bool ret = true;
124     while (!file_->IsEof()) {
125         int c = file_->GetChar();
126         if (c == untilChar) {
127             ret = true;
128             break;
129         }
130         if (c == '\n') {
131             file_->GetChar();
132             ret = false;
133             break;
134         }
135     }
136     havePeek_ = false;
137     return ret;
138 }
139 
Skip(char untilChar)140 void Lexer::Skip(char untilChar)
141 {
142     while (!file_->IsEof()) {
143         int c = file_->GetChar();
144         if (c == untilChar) {
145             break;
146         }
147     }
148     havePeek_ = false;
149 }
150 
SkipToken(TokenType tokenType)151 void Lexer::SkipToken(TokenType tokenType)
152 {
153     while (curToken_.kind_ != tokenType) {
154         GetToken(false);
155     }
156 }
157 
SkipUntilToken(TokenType tokenType)158 void Lexer::SkipUntilToken(TokenType tokenType)
159 {
160     Token token = PeekToken();
161     while (token.kind_ != tokenType) {
162         GetToken(false);
163         token = PeekToken();
164     }
165 }
166 
SkipEof()167 void Lexer::SkipEof()
168 {
169     while (!file_->IsEof()) {}
170     havePeek_ = false;
171 }
172 
ReadToken(Token & token,bool skipComment)173 void Lexer::ReadToken(Token &token, bool skipComment)
174 {
175     if (!file_->IsEof()) {
176         InitCurToken(token);
177     }
178     while (!file_->IsEof()) {
179         char c = file_->PeekChar();
180         if (isspace(c)) {
181             file_->GetChar();
182             continue;
183         }
184         token.location_.row_ = file_->GetCharLineNumber();
185         token.location_.col_ = file_->GetCharColumnNumber();
186         if (isalpha(c) || c == '_') {
187             ReadId(token);
188             return;
189         } else if (isdigit(c)) {
190             ReadNum(token);
191             return;
192         } else if (c == '<') {
193             ReadShiftLeftOp(token);
194             return;
195         } else if (c == '>') {
196             ReadShiftRightOp(token);
197             return;
198         } else if (c == '+') {
199             ReadPPlusOp(token);
200             return;
201         } else if (c == '-') {
202             ReadMMinusOp(token);
203             return;
204         } else if (c == '/') {
205             ReadComment(token);
206             if ((token.kind_ == TokenType::COMMENT_BLOCK || token.kind_ == TokenType::COMMENT_LINE) && skipComment) {
207                 InitCurToken(token);
208                 continue;
209             }
210             return;
211         }
212         ReadSymbolToken(token);
213         return;
214     }
215     token.kind_ = TokenType::END_OF_FILE;
216     token.value_ = "";
217 }
218 
InitCurToken(Token & token)219 void Lexer::InitCurToken(Token &token)
220 {
221     token.kind_ = TokenType::UNKNOWN;
222     token.location_.filePath_ = file_->GetPath();
223     token.location_.row_ = 0;
224     token.location_.col_ = 0;
225     token.value_ = "";
226 }
227 
ReadId(Token & token)228 void Lexer::ReadId(Token &token)
229 {
230     char c = file_->GetChar();
231     StringBuilder sb;
232     sb.Append(c);
233     while (!file_->IsEof()) {
234         c = file_->PeekChar();
235         if (isalpha(c) || isdigit(c) || c == '_' || c == '.') {
236             c = file_->GetChar();
237             sb.Append(c);
238             continue;
239         }
240         if (isspace(c)) {
241             file_->GetChar();
242         }
243         break;
244     }
245 
246     std::string key = sb.ToString();
247     auto it = keyWords_.find(key);
248     token.kind_ = (it == keyWords_.end()) ? TokenType::ID : it->second;
249     token.value_ = sb.ToString();
250 }
251 
ReadNum(Token & token)252 void Lexer::ReadNum(Token &token)
253 {
254     char c = file_->PeekChar();
255     switch (c) {
256         case '0': {
257             file_->GetChar();
258             c = file_->PeekChar();
259             if (c == 'b' || c == 'B') {
260                 // binary number
261                 ReadBinaryNum(token);
262             } else if (isdigit(c)) {
263                 // octal number
264                 return ReadOctNum(token);
265             } else if (c == 'X' || c == 'x') {
266                 // hexadecimal number
267                 return ReadHexNum(token);
268             } else {
269                 // decimal number 0
270                 token.kind_ = TokenType::NUM;
271                 token.value_ = "0";
272             }
273             break;
274         }
275         default:
276             ReadDecNum(token);
277             break;
278     }
279 }
280 
ReadBinaryNum(Token & token)281 void Lexer::ReadBinaryNum(Token &token)
282 {
283     StringBuilder sb;
284     char c = file_->GetChar(); // read 'b' or 'B'
285     sb.AppendFormat("0%c", c);
286     bool err = true;
287 
288     while (!file_->IsEof()) {
289         c = file_->PeekChar();
290         if (c == '0' || c == '1') {
291             sb.Append(c);
292             file_->GetChar();
293             err = false;
294         } else {
295             break;
296         }
297     }
298 
299     token.kind_ = err ? TokenType::UNKNOWN : TokenType::NUM;
300     token.value_ = sb.ToString();
301 }
302 
ReadOctNum(Token & token)303 void Lexer::ReadOctNum(Token &token)
304 {
305     StringBuilder sb;
306     sb.Append("0");
307     bool err = false;
308 
309     while (!file_->IsEof()) {
310         char c = file_->PeekChar();
311         if (!isdigit(c)) {
312             break;
313         }
314 
315         if (!(c >= '0' && c <= '7')) {
316             err = true;
317         }
318         sb.Append(c);
319         file_->GetChar();
320     }
321 
322     token.kind_ = err ? TokenType::UNKNOWN : TokenType::NUM;
323     token.value_ = sb.ToString();
324 }
325 
ReadHexNum(Token & token)326 void Lexer::ReadHexNum(Token &token)
327 {
328     StringBuilder sb;
329     char c = file_->GetChar(); // read 'x' or 'X'
330     sb.AppendFormat("0%c", c);
331     bool err = true;
332 
333     while (!file_->IsEof()) {
334         c = file_->PeekChar();
335         if (isdigit(c) || ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) {
336             sb.Append(c);
337             file_->GetChar();
338             err = false;
339         } else {
340             break;
341         }
342     }
343 
344     token.kind_ = err ? TokenType::UNKNOWN : TokenType::NUM;
345     token.value_ = sb.ToString();
346 }
347 
ReadDecNum(Token & token)348 void Lexer::ReadDecNum(Token &token)
349 {
350     StringBuilder sb;
351     char c = file_->GetChar();
352     sb.Append(c);
353 
354     while (!file_->IsEof()) {
355         c = file_->PeekChar();
356         if (!isdigit(c)) {
357             break;
358         }
359 
360         sb.Append(c);
361         file_->GetChar();
362     }
363 
364     token.kind_ = TokenType::NUM;
365     token.value_ = sb.ToString();
366 }
367 
ReadShiftLeftOp(Token & token)368 void Lexer::ReadShiftLeftOp(Token &token)
369 {
370     char c = file_->GetChar();
371     char next = file_->PeekChar();
372     if (next == '<') {
373         file_->GetChar();
374         token.kind_ = TokenType::LEFT_SHIFT;
375         token.value_ = "<<";
376         return;
377     }
378 
379     std::string symbol = StringHelper::Format("%c", c);
380     auto iter = symbols_.find(symbol);
381     token.kind_ = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
382     token.value_ = symbol;
383 }
384 
ReadShiftRightOp(Token & token)385 void Lexer::ReadShiftRightOp(Token &token)
386 {
387     char c = file_->GetChar();
388     char next = file_->PeekChar();
389     if (next == '>' && mode_ == ParseMode::EXPR_MODE) {
390         file_->GetChar();
391         token.kind_ = TokenType::RIGHT_SHIFT;
392         token.value_ = ">>";
393         return;
394     }
395 
396     std::string symbol = StringHelper::Format("%c", c);
397     auto iter = symbols_.find(symbol);
398     token.kind_ = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
399     token.value_ = symbol;
400 }
401 
ReadPPlusOp(Token & token)402 void Lexer::ReadPPlusOp(Token &token)
403 {
404     char c = file_->GetChar();
405     char next = file_->PeekChar();
406     if (next == '+') {
407         file_->GetChar();
408         token.kind_ = TokenType::PPLUS;
409         token.value_ = "++";
410         return;
411     }
412 
413     std::string symbol = StringHelper::Format("%c", c);
414     auto iter = symbols_.find(symbol);
415     token.kind_ = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
416     token.value_ = symbol;
417 }
418 
ReadMMinusOp(Token & token)419 void Lexer::ReadMMinusOp(Token &token)
420 {
421     char c = file_->GetChar();
422     char next = file_->PeekChar();
423     if (next == '-') {
424         file_->GetChar();
425         token.kind_ = TokenType::MMINUS;
426         token.value_ = "--";
427         return;
428     }
429 
430     std::string symbol = StringHelper::Format("%c", c);
431     auto iter = symbols_.find(symbol);
432     token.kind_ = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
433     token.value_ = symbol;
434 }
435 
ReadComment(Token & token)436 void Lexer::ReadComment(Token &token)
437 {
438     char c = file_->GetChar();
439     char next = file_->PeekChar();
440     if (next == '/') {
441         ReadLineComment(token);
442         return;
443     } else if (next == '*') {
444         ReadBlockComment(token);
445         return;
446     }
447 
448     std::string symbol = StringHelper::Format("%c", c);
449     auto iter = symbols_.find(symbol);
450     token.kind_ = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
451     token.value_ = symbol;
452 }
453 
ReadLineComment(Token & token)454 void Lexer::ReadLineComment(Token &token)
455 {
456     StringBuilder sb;
457     char c = file_->GetChar();
458     sb.AppendFormat("/%c", c);
459 
460     while (!file_->IsEof()) {
461         c = file_->GetChar();
462         if (c == '\n') {
463             break;
464         }
465         sb.Append(c);
466     }
467 
468     token.kind_ = TokenType::COMMENT_LINE;
469     token.value_ = sb.ToString();
470 }
471 
ReadBlockComment(Token & token)472 void Lexer::ReadBlockComment(Token &token)
473 {
474     StringBuilder sb;
475     char c = file_->GetChar();
476     sb.AppendFormat("/%c", c);
477 
478     while (!file_->IsEof()) {
479         c = file_->GetChar();
480         sb.Append(c);
481 
482         if (c == '*' && file_->PeekChar() == '/') {
483             c = file_->GetChar();
484             sb.Append(c);
485             break;
486         }
487     }
488 
489     token.kind_ = TokenType::COMMENT_BLOCK;
490     token.value_ = sb.ToString();
491 }
492 
ReadSymbolToken(Token & token)493 void Lexer::ReadSymbolToken(Token &token)
494 {
495     char c = file_->GetChar();
496     std::string symbol = StringHelper::Format("%c", c);
497     auto iter = symbols_.find(symbol);
498     token.kind_ = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
499     token.value_ = symbol;
500 }
501 } // namespace HDI
502 } // namespace OHOS