• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "lexer/lexer.h"
17 #include "util/logger.h"
18 #include "util/string_builder.h"
19 #include "util/string_helper.h"
20 
21 namespace OHOS {
22 namespace Idl {
23 Lexer::StrTokenTypeMap Lexer::keyWords_ = {
24     {"void",           TokenType::VOID      },
25     {"boolean",        TokenType::BOOLEAN      },
26     {"byte",           TokenType::BYTE         },
27     {"short",          TokenType::SHORT        },
28     {"int",            TokenType::INT          },
29     {"long",           TokenType::LONG         },
30     {"CString",        TokenType::CSTRING      },
31     {"String",         TokenType::STRING       },
32     {"String16",       TokenType::STRING16     },
33     {"U16string",      TokenType::U16STRING    },
34     {"float",          TokenType::FLOAT        },
35     {"double",         TokenType::DOUBLE       },
36     {"FileDescriptor", TokenType::FD           },
37     {"FileDescriptorSan", TokenType::FDSAN     },
38     {"Ashmem",         TokenType::ASHMEM       },
39     {"NativeBuffer",   TokenType::NATIVE_BUFFER},
40     {"Pointer",        TokenType::POINTER      },
41     {"List",           TokenType::LIST         },
42     {"Set",            TokenType::SET          },
43     {"Map",            TokenType::MAP          },
44     {"OrderedMap",     TokenType::ORDEREDMAP   },
45     {"SharedMemQueue", TokenType::SMQ          },
46     {"char",           TokenType::CHAR         },
47     {"unsigned",       TokenType::UNSIGNED     },
48     {"enum",           TokenType::ENUM         },
49     {"sharedptr",      TokenType::SHAREDPTR    },
50     {"uniqueptr",      TokenType::UNIQUEPTR    },
51     {"sptr",           TokenType::SPTR         },
52     {"null_sharedptr",   TokenType::NULL_SHAREDPTR  },
53     {"null_uniqueptr",   TokenType::NULL_UNIQUEPTR  },
54     {"null_sptr",        TokenType::NULL_SPTR       },
55     {"struct",         TokenType::STRUCT       },
56     {"union",          TokenType::UNION        },
57     {"package",        TokenType::PACKAGE      },
58     {"interface_token",     TokenType::INTERFACE_TOKEN    },
59     {"support_delegator",   TokenType::SUPPORT_DELEGATOR  },
60     {"option_stub_hooks",  TokenType::OPTION_STUB_HOOKS  },
61     {"option_parcel_hooks",  TokenType::OPTION_PARCEL_HOOKS  },
62     {"sequenceable",   TokenType::SEQ          },
63     {"rawdata",        TokenType::RAWDATA      },
64     {"import",         TokenType::IMPORT       },
65     {"interface",      TokenType::INTERFACE    },
66     {"extends",        TokenType::EXTENDS      },
67     {"oneway",         TokenType::ONEWAY       },
68     {"customMsgOption",  TokenType::CUSTOM_MSG_OPTION  },
69     {"callback",       TokenType::CALLBACK     },
70     {"freezecontrol",  TokenType::FREEZECONTROL},
71     {"full",           TokenType::FULL         },
72     {"lite",           TokenType::LITE         },
73     {"mini",           TokenType::MINI         },
74     {"cacheable",      TokenType::CACHEABLE    },
75     {"ipccode",        TokenType::IPCCODE      },
76     {"ipcincapacity",  TokenType::IPC_IN_CAPACITY  },
77     {"ipcoutcapacity", TokenType::IPC_OUT_CAPACITY },
78     {"macrodef",       TokenType::MACRODEF     },
79     {"macrondef",      TokenType::MACRONDEF    },
80     {"in",             TokenType::IN           },
81     {"out",            TokenType::OUT          },
82     {"inout",          TokenType::INOUT        },
83 };
84 
85 Lexer::StrTokenTypeMap Lexer::symbols_ = {
86     {".",  TokenType::DOT                 },
87     {",",  TokenType::COMMA               },
88     {":",  TokenType::COLON               },
89     {"=",  TokenType::ASSIGN              },
90     {";",  TokenType::SEMICOLON           },
91     {"{",  TokenType::BRACES_LEFT         },
92     {"}",  TokenType::BRACES_RIGHT        },
93     {"[",  TokenType::BRACKETS_LEFT       },
94     {"]",  TokenType::BRACKETS_RIGHT      },
95     {"(",  TokenType::PARENTHESES_LEFT    },
96     {")",  TokenType::PARENTHESES_RIGHT   },
97     {"<",  TokenType::ANGLE_BRACKETS_LEFT },
98     {">",  TokenType::ANGLE_BRACKETS_RIGHT},
99     {"+",  TokenType::ADD                 },
100     {"-",  TokenType::SUB                 },
101     {"*",  TokenType::STAR                },
102     {"/",  TokenType::SLASH               },
103     {"%",  TokenType::PERCENT_SIGN        },
104     {"<<", TokenType::LEFT_SHIFT          },
105     {">>", TokenType::RIGHT_SHIFT         },
106     {"&",  TokenType::AND                 },
107     {"^",  TokenType::XOR                 },
108     {"|",  TokenType::OR                  },
109     {"~",  TokenType::TILDE               },
110     {"++", TokenType::PPLUS               },
111     {"--", TokenType::MMINUS              },
112 };
113 
Lexer()114 Lexer::Lexer() : filePath_(), file_(nullptr), mode_(ParseMode::DECL_MODE), havePeek_(false), curToken_() {}
115 
Reset(const std::string & filePath)116 bool Lexer::Reset(const std::string &filePath)
117 {
118     file_ = std::make_unique<File>(filePath, int(File::READ));
119     if (!file_->IsValid()) {
120         return false;
121     }
122 
123     havePeek_ = false;
124     InitCurToken(curToken_);
125     return true;
126 }
127 
PeekToken(bool skipComment)128 Token Lexer::PeekToken(bool skipComment)
129 {
130     if (!havePeek_) {
131         ReadToken(curToken_, skipComment);
132         havePeek_ = true;
133     }
134     return curToken_;
135 }
136 
GetToken(bool skipComment)137 Token Lexer::GetToken(bool skipComment)
138 {
139     if (!havePeek_) {
140         ReadToken(curToken_, skipComment);
141     }
142     havePeek_ = false;
143     return curToken_;
144 }
145 
SkipCurrentLine()146 void Lexer::SkipCurrentLine()
147 {
148     while (!file_->IsEof()) {
149         char c = file_->GetChar();
150         if (c == '\n') {
151             file_->GetChar();
152             break;
153         }
154     }
155     havePeek_ = false;
156 }
157 
SkipCurrentLine(char untilChar)158 bool Lexer::SkipCurrentLine(char untilChar)
159 {
160     bool ret = true;
161     while (!file_->IsEof()) {
162         int c = file_->GetChar();
163         if (c == untilChar) {
164             ret = true;
165             break;
166         }
167         if (c == '\n') {
168             file_->GetChar();
169             ret = false;
170             break;
171         }
172     }
173     havePeek_ = false;
174     return ret;
175 }
176 
Skip(char untilChar)177 void Lexer::Skip(char untilChar)
178 {
179     while (!file_->IsEof()) {
180         int c = file_->GetChar();
181         if (c == untilChar) {
182             break;
183         }
184     }
185     havePeek_ = false;
186 }
187 
SkipToken(TokenType tokenType)188 void Lexer::SkipToken(TokenType tokenType)
189 {
190     while (curToken_.kind != tokenType && curToken_.kind != TokenType::END_OF_FILE) {
191         GetToken(false);
192     }
193 }
194 
SkipUntilToken(TokenType tokenType)195 void Lexer::SkipUntilToken(TokenType tokenType)
196 {
197     Token token = PeekToken();
198     while (token.kind != tokenType) {
199         GetToken(false);
200         token = PeekToken();
201     }
202 }
203 
SkipEof()204 void Lexer::SkipEof()
205 {
206     while (!file_->IsEof()) {}
207     havePeek_ = false;
208 }
209 
ReadCacheableTime(Token & token)210 bool Lexer::ReadCacheableTime(Token &token)
211 {
212     bool ret = true;
213     StringBuilder sb;
214 
215     while (!file_->IsEof()) {
216         char c = file_->PeekChar();
217         if (isspace(c)) {
218             file_->GetChar();
219             continue;
220         }
221         if (!isdigit(c)) {
222             if (c != ']' && c != ',') {
223                 ret = false;
224             }
225             break;
226         }
227         sb.Append(c);
228         file_->GetChar();
229     }
230 
231     if (ret == false) {
232         return ret;
233     }
234 
235     token.value = sb.ToString();
236     if (token.value.empty()) {
237         return false;
238     }
239 
240     return ret;
241 }
242 
ReadToken(Token & token,bool skipComment)243 void Lexer::ReadToken(Token &token, bool skipComment)
244 {
245     if (!file_->IsEof()) {
246         InitCurToken(token);
247     }
248     while (!file_->IsEof()) {
249         char c = file_->PeekChar();
250         if (isspace(c)) {
251             file_->GetChar();
252             continue;
253         }
254         token.location.row = file_->GetCharLineNumber();
255         token.location.col = file_->GetCharColumnNumber();
256         if (isalpha(c) || c == '_' || (c == '.' && file_->NextChar() == '.')) {
257             ReadId(token);
258             return;
259         } else if (isdigit(c)) {
260             ReadNum(token);
261             return;
262         } else if (c == '<') {
263             ReadShiftLeftOp(token);
264             return;
265         } else if (c == '>') {
266             ReadShiftRightOp(token);
267             return;
268         } else if (c == '+') {
269             ReadPPlusOp(token);
270             return;
271         } else if (c == '-') {
272             ReadMMinusOp(token);
273             return;
274         } else if (c == '/') {
275             ReadComment(token);
276             if ((token.kind == TokenType::COMMENT_BLOCK || token.kind == TokenType::COMMENT_LINE) && skipComment) {
277                 InitCurToken(token);
278                 continue;
279             }
280             return;
281         }
282         ReadSymbolToken(token);
283         return;
284     }
285     token.kind = TokenType::END_OF_FILE;
286     token.value = "";
287 }
288 
InitCurToken(Token & token)289 void Lexer::InitCurToken(Token &token)
290 {
291     token.kind = TokenType::UNKNOWN;
292     token.location.filePath = file_->GetPath();
293     token.location.row = 1;
294     token.location.col = 1;
295     token.value = "";
296 }
297 
ReadId(Token & token)298 void Lexer::ReadId(Token &token)
299 {
300     char c = file_->GetChar();
301     StringBuilder sb;
302     sb.Append(c);
303     while (!file_->IsEof()) {
304         c = file_->PeekChar();
305         if (isalpha(c) || isdigit(c) || c == '_' || c == '.' || c == '/') {
306             c = file_->GetChar();
307             sb.Append(c);
308             continue;
309         }
310         if (isspace(c)) {
311             file_->GetChar();
312         }
313         break;
314     }
315 
316     std::string key = sb.ToString();
317     auto it = keyWords_.find(key);
318     token.kind = (it != keyWords_.end()) ? it->second : TokenType::ID;
319     token.value = sb.ToString();
320 }
321 
ReadNum(Token & token)322 void Lexer::ReadNum(Token &token)
323 {
324     char c = file_->PeekChar();
325     if (c == '0') {
326         file_->GetChar();
327         c = file_->PeekChar();
328         if (c == 'b' || c == 'B') {
329             // binary number
330             ReadBinaryNum(token);
331         } else if (isdigit(c)) {
332             // octal number
333             return ReadOctNum(token);
334         } else if (c == 'X' || c == 'x') {
335             // hexadecimal number
336             return ReadHexNum(token);
337         } else {
338             // decimal number 0
339             token.kind = TokenType::NUM;
340             token.value = "0";
341         }
342     } else {
343         ReadDecNum(token);
344     }
345     ReadNumSuffix(token);
346 }
347 
ReadBinaryNum(Token & token)348 void Lexer::ReadBinaryNum(Token &token)
349 {
350     StringBuilder sb;
351     char c = file_->GetChar(); // read 'b' or 'B'
352     sb.AppendFormat("0%c", c);
353     bool err = true;
354 
355     while (!file_->IsEof()) {
356         c = file_->PeekChar();
357         if (c == '0' || c == '1') {
358             sb.Append(c);
359             file_->GetChar();
360             err = false;
361         } else {
362             break;
363         }
364     }
365 
366     token.kind = err ? TokenType::UNKNOWN : TokenType::NUM;
367     token.value = sb.ToString();
368 }
369 
ReadOctNum(Token & token)370 void Lexer::ReadOctNum(Token &token)
371 {
372     StringBuilder sb;
373     sb.Append("0");
374     bool err = false;
375 
376     while (!file_->IsEof()) {
377         char c = file_->PeekChar();
378         if (!isdigit(c)) {
379             break;
380         }
381 
382         if (!(c >= '0' && c <= '7')) {
383             err = true;
384         }
385         sb.Append(c);
386         file_->GetChar();
387     }
388 
389     token.kind = err ? TokenType::UNKNOWN : TokenType::NUM;
390     token.value = sb.ToString();
391 }
392 
ReadHexNum(Token & token)393 void Lexer::ReadHexNum(Token &token)
394 {
395     StringBuilder sb;
396     char c = file_->GetChar(); // read 'x' or 'X'
397     sb.AppendFormat("0%c", c);
398     bool err = true;
399 
400     while (!file_->IsEof()) {
401         c = file_->PeekChar();
402         if (isdigit(c) || ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) {
403             sb.Append(c);
404             file_->GetChar();
405             err = false;
406         } else {
407             break;
408         }
409     }
410 
411     token.kind = err ? TokenType::UNKNOWN : TokenType::NUM;
412     token.value = sb.ToString();
413 }
414 
ReadDecNum(Token & token)415 void Lexer::ReadDecNum(Token &token)
416 {
417     StringBuilder sb;
418     char c = file_->GetChar();
419     sb.Append(c);
420 
421     while (!file_->IsEof()) {
422         c = file_->PeekChar();
423         if (!isdigit(c)) {
424             break;
425         }
426 
427         sb.Append(c);
428         file_->GetChar();
429     }
430 
431     token.kind = TokenType::NUM;
432     token.value = sb.ToString();
433 }
434 
ReadNumSuffix(Token & token)435 void Lexer::ReadNumSuffix(Token &token)
436 {
437     while (!file_->IsEof()) {
438         char c = file_->PeekChar();
439         if (isalpha(c) || isdigit(c) || c == '_' || c == '.') {
440             token.value += c;
441             file_->GetChar();
442         } else {
443             break;
444         }
445     }
446 }
447 
ReadShiftLeftOp(Token & token)448 void Lexer::ReadShiftLeftOp(Token &token)
449 {
450     char c = file_->GetChar();
451     char next = file_->PeekChar();
452     if (next == '<') {
453         file_->GetChar();
454         token.kind = TokenType::LEFT_SHIFT;
455         token.value = "<<";
456         return;
457     }
458 
459     std::string symbol = StringHelper::Format("%c", c);
460     auto iter = symbols_.find(symbol);
461     token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
462     token.value = symbol;
463 }
464 
ReadShiftRightOp(Token & token)465 void Lexer::ReadShiftRightOp(Token &token)
466 {
467     char c = file_->GetChar();
468     char next = file_->PeekChar();
469     if (next == '>' && mode_ == ParseMode::EXPR_MODE) {
470         file_->GetChar();
471         token.kind = TokenType::RIGHT_SHIFT;
472         token.value = ">>";
473         return;
474     }
475 
476     std::string symbol = StringHelper::Format("%c", c);
477     auto iter = symbols_.find(symbol);
478     token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
479     token.value = symbol;
480 }
481 
ReadPPlusOp(Token & token)482 void Lexer::ReadPPlusOp(Token &token)
483 {
484     char c = file_->GetChar();
485     char next = file_->PeekChar();
486     if (next == '+') {
487         file_->GetChar();
488         token.kind = TokenType::PPLUS;
489         token.value = "++";
490         return;
491     }
492 
493     std::string symbol = StringHelper::Format("%c", c);
494     auto iter = symbols_.find(symbol);
495     token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
496     token.value = symbol;
497 }
498 
ReadMMinusOp(Token & token)499 void Lexer::ReadMMinusOp(Token &token)
500 {
501     char c = file_->GetChar();
502     char next = file_->PeekChar();
503     if (next == '-') {
504         file_->GetChar();
505         token.kind = TokenType::MMINUS;
506         token.value = "--";
507         return;
508     }
509 
510     std::string symbol = StringHelper::Format("%c", c);
511     auto iter = symbols_.find(symbol);
512     token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
513     token.value = symbol;
514 }
515 
ReadComment(Token & token)516 void Lexer::ReadComment(Token &token)
517 {
518     char c = file_->GetChar();
519     char next = file_->PeekChar();
520     if (next == '/') {
521         ReadLineComment(token);
522         return;
523     } else if (next == '*') {
524         ReadBlockComment(token);
525         return;
526     }
527 
528     std::string symbol = StringHelper::Format("%c", c);
529     auto iter = symbols_.find(symbol);
530     token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
531     token.value = symbol;
532 }
533 
ReadLineComment(Token & token)534 void Lexer::ReadLineComment(Token &token)
535 {
536     StringBuilder sb;
537     char c = file_->GetChar();
538     sb.AppendFormat("/%c", c);
539 
540     while (!file_->IsEof()) {
541         c = file_->GetChar();
542         if (c == '\n') {
543             break;
544         }
545         sb.Append(c);
546     }
547 
548     token.kind = TokenType::COMMENT_LINE;
549     token.value = sb.ToString();
550 }
551 
ReadBlockComment(Token & token)552 void Lexer::ReadBlockComment(Token &token)
553 {
554     StringBuilder sb;
555     char c = file_->GetChar();
556     sb.AppendFormat("/%c", c);
557 
558     while (!file_->IsEof()) {
559         c = file_->GetChar();
560         sb.Append(c);
561 
562         if (c == '*' && file_->PeekChar() == '/') {
563             c = file_->GetChar();
564             sb.Append(c);
565             break;
566         }
567     }
568 
569     token.kind = TokenType::COMMENT_BLOCK;
570     token.value = sb.ToString();
571 }
572 
ReadSymbolToken(Token & token)573 void Lexer::ReadSymbolToken(Token &token)
574 {
575     char c = file_->GetChar();
576     std::string symbol = StringHelper::Format("%c", c);
577     auto iter = symbols_.find(symbol);
578     token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
579     token.value = symbol;
580 }
581 } // namespace Idl
582 } // namespace OHOS