• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "lexer/lexer.h"
17 #include "util/logger.h"
18 #include "util/string_builder.h"
19 #include "util/string_helper.h"
20 
21 namespace OHOS {
22 namespace Idl {
23 Lexer::StrTokenTypeMap Lexer::keyWords_ = {
24     {"void",           TokenType::VOID      },
25     {"boolean",        TokenType::BOOLEAN      },
26     {"byte",           TokenType::BYTE         },
27     {"short",          TokenType::SHORT        },
28     {"int",            TokenType::INT          },
29     {"long",           TokenType::LONG         },
30     {"String",         TokenType::STRING       },
31     {"String16",       TokenType::STRING16     },
32     {"float",          TokenType::FLOAT        },
33     {"double",         TokenType::DOUBLE       },
34     {"FileDescriptor", TokenType::FD           },
35     {"Ashmem",         TokenType::ASHMEM       },
36     {"NativeBuffer",   TokenType::NATIVE_BUFFER},
37     {"Pointer",        TokenType::POINTER      },
38     {"List",           TokenType::LIST         },
39     {"Map",            TokenType::MAP          },
40     {"OrderedMap",     TokenType::ORDEREDMAP   },
41     {"SharedMemQueue", TokenType::SMQ          },
42     {"char",           TokenType::CHAR         },
43     {"unsigned",       TokenType::UNSIGNED     },
44     {"enum",           TokenType::ENUM         },
45     {"struct",         TokenType::STRUCT       },
46     {"union",          TokenType::UNION        },
47     {"package",        TokenType::PACKAGE      },
48     {"interface_token",     TokenType::INTERFACE_TOKEN    },
49     {"support_delegator",   TokenType::SUPPORT_DELEGATOR  },
50     {"option_stub_hooks",  TokenType::OPTION_STUB_HOOKS  },
51     {"option_parcel_hooks",  TokenType::OPTION_PARCEL_HOOKS  },
52     {"sequenceable",   TokenType::SEQ          },
53     {"rawdata",        TokenType::RAWDATA      },
54     {"import",         TokenType::IMPORT       },
55     {"interface",      TokenType::INTERFACE    },
56     {"extends",        TokenType::EXTENDS      },
57     {"oneway",         TokenType::ONEWAY       },
58     {"customMsgOption",  TokenType::CUSTOM_MSG_OPTION  },
59     {"callback",       TokenType::CALLBACK     },
60     {"freezecontrol",  TokenType::FREEZECONTROL},
61     {"full",           TokenType::FULL         },
62     {"lite",           TokenType::LITE         },
63     {"mini",           TokenType::MINI         },
64     {"cacheable",      TokenType::CACHEABLE    },
65     {"ipccode",        TokenType::IPCCODE      },
66     {"ipcincapacity",  TokenType::IPC_IN_CAPACITY  },
67     {"ipcoutcapacity", TokenType::IPC_OUT_CAPACITY },
68     {"macrodef",       TokenType::MACRODEF     },
69     {"macrondef",      TokenType::MACRONDEF    },
70     {"in",             TokenType::IN           },
71     {"out",            TokenType::OUT          },
72     {"inout",          TokenType::INOUT        },
73 };
74 
75 Lexer::StrTokenTypeMap Lexer::symbols_ = {
76     {".",  TokenType::DOT                 },
77     {",",  TokenType::COMMA               },
78     {":",  TokenType::COLON               },
79     {"=",  TokenType::ASSIGN              },
80     {";",  TokenType::SEMICOLON           },
81     {"{",  TokenType::BRACES_LEFT         },
82     {"}",  TokenType::BRACES_RIGHT        },
83     {"[",  TokenType::BRACKETS_LEFT       },
84     {"]",  TokenType::BRACKETS_RIGHT      },
85     {"(",  TokenType::PARENTHESES_LEFT    },
86     {")",  TokenType::PARENTHESES_RIGHT   },
87     {"<",  TokenType::ANGLE_BRACKETS_LEFT },
88     {">",  TokenType::ANGLE_BRACKETS_RIGHT},
89     {"+",  TokenType::ADD                 },
90     {"-",  TokenType::SUB                 },
91     {"*",  TokenType::STAR                },
92     {"/",  TokenType::SLASH               },
93     {"%",  TokenType::PERCENT_SIGN        },
94     {"<<", TokenType::LEFT_SHIFT          },
95     {">>", TokenType::RIGHT_SHIFT         },
96     {"&",  TokenType::AND                 },
97     {"^",  TokenType::XOR                 },
98     {"|",  TokenType::OR                  },
99     {"~",  TokenType::TILDE               },
100     {"++", TokenType::PPLUS               },
101     {"--", TokenType::MMINUS              },
102 };
103 
Lexer()104 Lexer::Lexer() : filePath_(), file_(nullptr), mode_(ParseMode::DECL_MODE), havePeek_(false), curToken_() {}
105 
Reset(const std::string & filePath)106 bool Lexer::Reset(const std::string &filePath)
107 {
108     file_ = std::make_unique<File>(filePath, int(File::READ));
109     if (!file_->IsValid()) {
110         return false;
111     }
112 
113     havePeek_ = false;
114     InitCurToken(curToken_);
115     return true;
116 }
117 
PeekToken(bool skipComment)118 Token Lexer::PeekToken(bool skipComment)
119 {
120     if (!havePeek_) {
121         ReadToken(curToken_, skipComment);
122         havePeek_ = true;
123     }
124     return curToken_;
125 }
126 
GetToken(bool skipComment)127 Token Lexer::GetToken(bool skipComment)
128 {
129     if (!havePeek_) {
130         ReadToken(curToken_, skipComment);
131     }
132     havePeek_ = false;
133     return curToken_;
134 }
135 
SkipCurrentLine()136 void Lexer::SkipCurrentLine()
137 {
138     while (!file_->IsEof()) {
139         char c = file_->GetChar();
140         if (c == '\n') {
141             file_->GetChar();
142             break;
143         }
144     }
145     havePeek_ = false;
146 }
147 
SkipCurrentLine(char untilChar)148 bool Lexer::SkipCurrentLine(char untilChar)
149 {
150     bool ret = true;
151     while (!file_->IsEof()) {
152         int c = file_->GetChar();
153         if (c == untilChar) {
154             ret = true;
155             break;
156         }
157         if (c == '\n') {
158             file_->GetChar();
159             ret = false;
160             break;
161         }
162     }
163     havePeek_ = false;
164     return ret;
165 }
166 
Skip(char untilChar)167 void Lexer::Skip(char untilChar)
168 {
169     while (!file_->IsEof()) {
170         int c = file_->GetChar();
171         if (c == untilChar) {
172             break;
173         }
174     }
175     havePeek_ = false;
176 }
177 
SkipToken(TokenType tokenType)178 void Lexer::SkipToken(TokenType tokenType)
179 {
180     while (curToken_.kind != tokenType && curToken_.kind != TokenType::END_OF_FILE) {
181         GetToken(false);
182     }
183 }
184 
SkipUntilToken(TokenType tokenType)185 void Lexer::SkipUntilToken(TokenType tokenType)
186 {
187     Token token = PeekToken();
188     while (token.kind != tokenType) {
189         GetToken(false);
190         token = PeekToken();
191     }
192 }
193 
SkipEof()194 void Lexer::SkipEof()
195 {
196     while (!file_->IsEof()) {}
197     havePeek_ = false;
198 }
199 
ReadCacheableTime(Token & token)200 bool Lexer::ReadCacheableTime(Token &token)
201 {
202     bool ret = true;
203     StringBuilder sb;
204 
205     while (!file_->IsEof()) {
206         char c = file_->PeekChar();
207         if (isspace(c)) {
208             file_->GetChar();
209             continue;
210         }
211         if (!isdigit(c)) {
212             if (c != ']' && c != ',') {
213                 ret = false;
214             }
215             break;
216         }
217         sb.Append(c);
218         file_->GetChar();
219     }
220 
221     if (ret == false) {
222         return ret;
223     }
224 
225     token.value = sb.ToString();
226     if (token.value.empty()) {
227         return false;
228     }
229 
230     return ret;
231 }
232 
ReadToken(Token & token,bool skipComment)233 void Lexer::ReadToken(Token &token, bool skipComment)
234 {
235     if (!file_->IsEof()) {
236         InitCurToken(token);
237     }
238     while (!file_->IsEof()) {
239         char c = file_->PeekChar();
240         if (isspace(c)) {
241             file_->GetChar();
242             continue;
243         }
244         token.location.row = file_->GetCharLineNumber();
245         token.location.col = file_->GetCharColumnNumber();
246         if (isalpha(c) || c == '_' || (c == '.' && file_->NextChar() == '.')) {
247             ReadId(token);
248             return;
249         } else if (isdigit(c)) {
250             ReadNum(token);
251             return;
252         } else if (c == '<') {
253             ReadShiftLeftOp(token);
254             return;
255         } else if (c == '>') {
256             ReadShiftRightOp(token);
257             return;
258         } else if (c == '+') {
259             ReadPPlusOp(token);
260             return;
261         } else if (c == '-') {
262             ReadMMinusOp(token);
263             return;
264         } else if (c == '/') {
265             ReadComment(token);
266             if ((token.kind == TokenType::COMMENT_BLOCK || token.kind == TokenType::COMMENT_LINE) && skipComment) {
267                 InitCurToken(token);
268                 continue;
269             }
270             return;
271         }
272         ReadSymbolToken(token);
273         return;
274     }
275     token.kind = TokenType::END_OF_FILE;
276     token.value = "";
277 }
278 
InitCurToken(Token & token)279 void Lexer::InitCurToken(Token &token)
280 {
281     token.kind = TokenType::UNKNOWN;
282     token.location.filePath = file_->GetPath();
283     token.location.row = 1;
284     token.location.col = 1;
285     token.value = "";
286 }
287 
ReadId(Token & token)288 void Lexer::ReadId(Token &token)
289 {
290     char c = file_->GetChar();
291     StringBuilder sb;
292     sb.Append(c);
293     while (!file_->IsEof()) {
294         c = file_->PeekChar();
295         if (isalpha(c) || isdigit(c) || c == '_' || c == '.' || c == '/') {
296             c = file_->GetChar();
297             sb.Append(c);
298             continue;
299         }
300         if (isspace(c)) {
301             file_->GetChar();
302         }
303         break;
304     }
305 
306     std::string key = sb.ToString();
307     auto it = keyWords_.find(key);
308     token.kind = (it != keyWords_.end()) ? it->second : TokenType::ID;
309     token.value = sb.ToString();
310 }
311 
ReadNum(Token & token)312 void Lexer::ReadNum(Token &token)
313 {
314     char c = file_->PeekChar();
315     if (c == '0') {
316         file_->GetChar();
317         c = file_->PeekChar();
318         if (c == 'b' || c == 'B') {
319             // binary number
320             ReadBinaryNum(token);
321         } else if (isdigit(c)) {
322             // octal number
323             return ReadOctNum(token);
324         } else if (c == 'X' || c == 'x') {
325             // hexadecimal number
326             return ReadHexNum(token);
327         } else {
328             // decimal number 0
329             token.kind = TokenType::NUM;
330             token.value = "0";
331         }
332     } else {
333         ReadDecNum(token);
334     }
335     ReadNumSuffix(token);
336 }
337 
ReadBinaryNum(Token & token)338 void Lexer::ReadBinaryNum(Token &token)
339 {
340     StringBuilder sb;
341     char c = file_->GetChar(); // read 'b' or 'B'
342     sb.AppendFormat("0%c", c);
343     bool err = true;
344 
345     while (!file_->IsEof()) {
346         c = file_->PeekChar();
347         if (c == '0' || c == '1') {
348             sb.Append(c);
349             file_->GetChar();
350             err = false;
351         } else {
352             break;
353         }
354     }
355 
356     token.kind = err ? TokenType::UNKNOWN : TokenType::NUM;
357     token.value = sb.ToString();
358 }
359 
ReadOctNum(Token & token)360 void Lexer::ReadOctNum(Token &token)
361 {
362     StringBuilder sb;
363     sb.Append("0");
364     bool err = false;
365 
366     while (!file_->IsEof()) {
367         char c = file_->PeekChar();
368         if (!isdigit(c)) {
369             break;
370         }
371 
372         if (!(c >= '0' && c <= '7')) {
373             err = true;
374         }
375         sb.Append(c);
376         file_->GetChar();
377     }
378 
379     token.kind = err ? TokenType::UNKNOWN : TokenType::NUM;
380     token.value = sb.ToString();
381 }
382 
ReadHexNum(Token & token)383 void Lexer::ReadHexNum(Token &token)
384 {
385     StringBuilder sb;
386     char c = file_->GetChar(); // read 'x' or 'X'
387     sb.AppendFormat("0%c", c);
388     bool err = true;
389 
390     while (!file_->IsEof()) {
391         c = file_->PeekChar();
392         if (isdigit(c) || ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) {
393             sb.Append(c);
394             file_->GetChar();
395             err = false;
396         } else {
397             break;
398         }
399     }
400 
401     token.kind = err ? TokenType::UNKNOWN : TokenType::NUM;
402     token.value = sb.ToString();
403 }
404 
ReadDecNum(Token & token)405 void Lexer::ReadDecNum(Token &token)
406 {
407     StringBuilder sb;
408     char c = file_->GetChar();
409     sb.Append(c);
410 
411     while (!file_->IsEof()) {
412         c = file_->PeekChar();
413         if (!isdigit(c)) {
414             break;
415         }
416 
417         sb.Append(c);
418         file_->GetChar();
419     }
420 
421     token.kind = TokenType::NUM;
422     token.value = sb.ToString();
423 }
424 
ReadNumSuffix(Token & token)425 void Lexer::ReadNumSuffix(Token &token)
426 {
427     while (!file_->IsEof()) {
428         char c = file_->PeekChar();
429         if (isalpha(c) || isdigit(c) || c == '_' || c == '.') {
430             token.value += c;
431             file_->GetChar();
432         } else {
433             break;
434         }
435     }
436 }
437 
ReadShiftLeftOp(Token & token)438 void Lexer::ReadShiftLeftOp(Token &token)
439 {
440     char c = file_->GetChar();
441     char next = file_->PeekChar();
442     if (next == '<') {
443         file_->GetChar();
444         token.kind = TokenType::LEFT_SHIFT;
445         token.value = "<<";
446         return;
447     }
448 
449     std::string symbol = StringHelper::Format("%c", c);
450     auto iter = symbols_.find(symbol);
451     token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
452     token.value = symbol;
453 }
454 
ReadShiftRightOp(Token & token)455 void Lexer::ReadShiftRightOp(Token &token)
456 {
457     char c = file_->GetChar();
458     char next = file_->PeekChar();
459     if (next == '>' && mode_ == ParseMode::EXPR_MODE) {
460         file_->GetChar();
461         token.kind = TokenType::RIGHT_SHIFT;
462         token.value = ">>";
463         return;
464     }
465 
466     std::string symbol = StringHelper::Format("%c", c);
467     auto iter = symbols_.find(symbol);
468     token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
469     token.value = symbol;
470 }
471 
ReadPPlusOp(Token & token)472 void Lexer::ReadPPlusOp(Token &token)
473 {
474     char c = file_->GetChar();
475     char next = file_->PeekChar();
476     if (next == '+') {
477         file_->GetChar();
478         token.kind = TokenType::PPLUS;
479         token.value = "++";
480         return;
481     }
482 
483     std::string symbol = StringHelper::Format("%c", c);
484     auto iter = symbols_.find(symbol);
485     token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
486     token.value = symbol;
487 }
488 
ReadMMinusOp(Token & token)489 void Lexer::ReadMMinusOp(Token &token)
490 {
491     char c = file_->GetChar();
492     char next = file_->PeekChar();
493     if (next == '-') {
494         file_->GetChar();
495         token.kind = TokenType::MMINUS;
496         token.value = "--";
497         return;
498     }
499 
500     std::string symbol = StringHelper::Format("%c", c);
501     auto iter = symbols_.find(symbol);
502     token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
503     token.value = symbol;
504 }
505 
ReadComment(Token & token)506 void Lexer::ReadComment(Token &token)
507 {
508     char c = file_->GetChar();
509     char next = file_->PeekChar();
510     if (next == '/') {
511         ReadLineComment(token);
512         return;
513     } else if (next == '*') {
514         ReadBlockComment(token);
515         return;
516     }
517 
518     std::string symbol = StringHelper::Format("%c", c);
519     auto iter = symbols_.find(symbol);
520     token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
521     token.value = symbol;
522 }
523 
ReadLineComment(Token & token)524 void Lexer::ReadLineComment(Token &token)
525 {
526     StringBuilder sb;
527     char c = file_->GetChar();
528     sb.AppendFormat("/%c", c);
529 
530     while (!file_->IsEof()) {
531         c = file_->GetChar();
532         if (c == '\n') {
533             break;
534         }
535         sb.Append(c);
536     }
537 
538     token.kind = TokenType::COMMENT_LINE;
539     token.value = sb.ToString();
540 }
541 
ReadBlockComment(Token & token)542 void Lexer::ReadBlockComment(Token &token)
543 {
544     StringBuilder sb;
545     char c = file_->GetChar();
546     sb.AppendFormat("/%c", c);
547 
548     while (!file_->IsEof()) {
549         c = file_->GetChar();
550         sb.Append(c);
551 
552         if (c == '*' && file_->PeekChar() == '/') {
553             c = file_->GetChar();
554             sb.Append(c);
555             break;
556         }
557     }
558 
559     token.kind = TokenType::COMMENT_BLOCK;
560     token.value = sb.ToString();
561 }
562 
ReadSymbolToken(Token & token)563 void Lexer::ReadSymbolToken(Token &token)
564 {
565     char c = file_->GetChar();
566     std::string symbol = StringHelper::Format("%c", c);
567     auto iter = symbols_.find(symbol);
568     token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
569     token.value = symbol;
570 }
571 } // namespace Idl
572 } // namespace OHOS