1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "lexer/lexer.h"
17 #include "util/logger.h"
18 #include "util/string_builder.h"
19 #include "util/string_helper.h"
20
21 namespace OHOS {
22 namespace Idl {
23 Lexer::StrTokenTypeMap Lexer::keyWords_ = {
24 {"void", TokenType::VOID },
25 {"boolean", TokenType::BOOLEAN },
26 {"byte", TokenType::BYTE },
27 {"short", TokenType::SHORT },
28 {"int", TokenType::INT },
29 {"long", TokenType::LONG },
30 {"CString", TokenType::CSTRING },
31 {"String", TokenType::STRING },
32 {"String16", TokenType::STRING16 },
33 {"U16string", TokenType::U16STRING },
34 {"float", TokenType::FLOAT },
35 {"double", TokenType::DOUBLE },
36 {"FileDescriptor", TokenType::FD },
37 {"FileDescriptorSan", TokenType::FDSAN },
38 {"Ashmem", TokenType::ASHMEM },
39 {"NativeBuffer", TokenType::NATIVE_BUFFER},
40 {"Pointer", TokenType::POINTER },
41 {"List", TokenType::LIST },
42 {"Set", TokenType::SET },
43 {"Map", TokenType::MAP },
44 {"OrderedMap", TokenType::ORDEREDMAP },
45 {"SharedMemQueue", TokenType::SMQ },
46 {"char", TokenType::CHAR },
47 {"unsigned", TokenType::UNSIGNED },
48 {"enum", TokenType::ENUM },
49 {"sharedptr", TokenType::SHAREDPTR },
50 {"uniqueptr", TokenType::UNIQUEPTR },
51 {"sptr", TokenType::SPTR },
52 {"null_sharedptr", TokenType::NULL_SHAREDPTR },
53 {"null_uniqueptr", TokenType::NULL_UNIQUEPTR },
54 {"null_sptr", TokenType::NULL_SPTR },
55 {"struct", TokenType::STRUCT },
56 {"union", TokenType::UNION },
57 {"package", TokenType::PACKAGE },
58 {"interface_token", TokenType::INTERFACE_TOKEN },
59 {"support_delegator", TokenType::SUPPORT_DELEGATOR },
60 {"option_stub_hooks", TokenType::OPTION_STUB_HOOKS },
61 {"option_parcel_hooks", TokenType::OPTION_PARCEL_HOOKS },
62 {"sequenceable", TokenType::SEQ },
63 {"rawdata", TokenType::RAWDATA },
64 {"import", TokenType::IMPORT },
65 {"interface", TokenType::INTERFACE },
66 {"extends", TokenType::EXTENDS },
67 {"oneway", TokenType::ONEWAY },
68 {"customMsgOption", TokenType::CUSTOM_MSG_OPTION },
69 {"callback", TokenType::CALLBACK },
70 {"freezecontrol", TokenType::FREEZECONTROL},
71 {"full", TokenType::FULL },
72 {"lite", TokenType::LITE },
73 {"mini", TokenType::MINI },
74 {"cacheable", TokenType::CACHEABLE },
75 {"ipccode", TokenType::IPCCODE },
76 {"ipcincapacity", TokenType::IPC_IN_CAPACITY },
77 {"ipcoutcapacity", TokenType::IPC_OUT_CAPACITY },
78 {"macrodef", TokenType::MACRODEF },
79 {"macrondef", TokenType::MACRONDEF },
80 {"in", TokenType::IN },
81 {"out", TokenType::OUT },
82 {"inout", TokenType::INOUT },
83 };
84
85 Lexer::StrTokenTypeMap Lexer::symbols_ = {
86 {".", TokenType::DOT },
87 {",", TokenType::COMMA },
88 {":", TokenType::COLON },
89 {"=", TokenType::ASSIGN },
90 {";", TokenType::SEMICOLON },
91 {"{", TokenType::BRACES_LEFT },
92 {"}", TokenType::BRACES_RIGHT },
93 {"[", TokenType::BRACKETS_LEFT },
94 {"]", TokenType::BRACKETS_RIGHT },
95 {"(", TokenType::PARENTHESES_LEFT },
96 {")", TokenType::PARENTHESES_RIGHT },
97 {"<", TokenType::ANGLE_BRACKETS_LEFT },
98 {">", TokenType::ANGLE_BRACKETS_RIGHT},
99 {"+", TokenType::ADD },
100 {"-", TokenType::SUB },
101 {"*", TokenType::STAR },
102 {"/", TokenType::SLASH },
103 {"%", TokenType::PERCENT_SIGN },
104 {"<<", TokenType::LEFT_SHIFT },
105 {">>", TokenType::RIGHT_SHIFT },
106 {"&", TokenType::AND },
107 {"^", TokenType::XOR },
108 {"|", TokenType::OR },
109 {"~", TokenType::TILDE },
110 {"++", TokenType::PPLUS },
111 {"--", TokenType::MMINUS },
112 };
113
Lexer()114 Lexer::Lexer() : filePath_(), file_(nullptr), mode_(ParseMode::DECL_MODE), havePeek_(false), curToken_() {}
115
Reset(const std::string & filePath)116 bool Lexer::Reset(const std::string &filePath)
117 {
118 file_ = std::make_unique<File>(filePath, int(File::READ));
119 if (!file_->IsValid()) {
120 return false;
121 }
122
123 havePeek_ = false;
124 InitCurToken(curToken_);
125 return true;
126 }
127
PeekToken(bool skipComment)128 Token Lexer::PeekToken(bool skipComment)
129 {
130 if (!havePeek_) {
131 ReadToken(curToken_, skipComment);
132 havePeek_ = true;
133 }
134 return curToken_;
135 }
136
GetToken(bool skipComment)137 Token Lexer::GetToken(bool skipComment)
138 {
139 if (!havePeek_) {
140 ReadToken(curToken_, skipComment);
141 }
142 havePeek_ = false;
143 return curToken_;
144 }
145
SkipCurrentLine()146 void Lexer::SkipCurrentLine()
147 {
148 while (!file_->IsEof()) {
149 char c = file_->GetChar();
150 if (c == '\n') {
151 file_->GetChar();
152 break;
153 }
154 }
155 havePeek_ = false;
156 }
157
SkipCurrentLine(char untilChar)158 bool Lexer::SkipCurrentLine(char untilChar)
159 {
160 bool ret = true;
161 while (!file_->IsEof()) {
162 int c = file_->GetChar();
163 if (c == untilChar) {
164 ret = true;
165 break;
166 }
167 if (c == '\n') {
168 file_->GetChar();
169 ret = false;
170 break;
171 }
172 }
173 havePeek_ = false;
174 return ret;
175 }
176
Skip(char untilChar)177 void Lexer::Skip(char untilChar)
178 {
179 while (!file_->IsEof()) {
180 int c = file_->GetChar();
181 if (c == untilChar) {
182 break;
183 }
184 }
185 havePeek_ = false;
186 }
187
SkipToken(TokenType tokenType)188 void Lexer::SkipToken(TokenType tokenType)
189 {
190 while (curToken_.kind != tokenType && curToken_.kind != TokenType::END_OF_FILE) {
191 GetToken(false);
192 }
193 }
194
SkipUntilToken(TokenType tokenType)195 void Lexer::SkipUntilToken(TokenType tokenType)
196 {
197 Token token = PeekToken();
198 while (token.kind != tokenType) {
199 GetToken(false);
200 token = PeekToken();
201 }
202 }
203
SkipEof()204 void Lexer::SkipEof()
205 {
206 while (!file_->IsEof()) {}
207 havePeek_ = false;
208 }
209
ReadCacheableTime(Token & token)210 bool Lexer::ReadCacheableTime(Token &token)
211 {
212 bool ret = true;
213 StringBuilder sb;
214
215 while (!file_->IsEof()) {
216 char c = file_->PeekChar();
217 if (isspace(c)) {
218 file_->GetChar();
219 continue;
220 }
221 if (!isdigit(c)) {
222 if (c != ']' && c != ',') {
223 ret = false;
224 }
225 break;
226 }
227 sb.Append(c);
228 file_->GetChar();
229 }
230
231 if (ret == false) {
232 return ret;
233 }
234
235 token.value = sb.ToString();
236 if (token.value.empty()) {
237 return false;
238 }
239
240 return ret;
241 }
242
ReadToken(Token & token,bool skipComment)243 void Lexer::ReadToken(Token &token, bool skipComment)
244 {
245 if (!file_->IsEof()) {
246 InitCurToken(token);
247 }
248 while (!file_->IsEof()) {
249 char c = file_->PeekChar();
250 if (isspace(c)) {
251 file_->GetChar();
252 continue;
253 }
254 token.location.row = file_->GetCharLineNumber();
255 token.location.col = file_->GetCharColumnNumber();
256 if (isalpha(c) || c == '_' || (c == '.' && file_->NextChar() == '.')) {
257 ReadId(token);
258 return;
259 } else if (isdigit(c)) {
260 ReadNum(token);
261 return;
262 } else if (c == '<') {
263 ReadShiftLeftOp(token);
264 return;
265 } else if (c == '>') {
266 ReadShiftRightOp(token);
267 return;
268 } else if (c == '+') {
269 ReadPPlusOp(token);
270 return;
271 } else if (c == '-') {
272 ReadMMinusOp(token);
273 return;
274 } else if (c == '/') {
275 ReadComment(token);
276 if ((token.kind == TokenType::COMMENT_BLOCK || token.kind == TokenType::COMMENT_LINE) && skipComment) {
277 InitCurToken(token);
278 continue;
279 }
280 return;
281 }
282 ReadSymbolToken(token);
283 return;
284 }
285 token.kind = TokenType::END_OF_FILE;
286 token.value = "";
287 }
288
InitCurToken(Token & token)289 void Lexer::InitCurToken(Token &token)
290 {
291 token.kind = TokenType::UNKNOWN;
292 token.location.filePath = file_->GetPath();
293 token.location.row = 1;
294 token.location.col = 1;
295 token.value = "";
296 }
297
ReadId(Token & token)298 void Lexer::ReadId(Token &token)
299 {
300 char c = file_->GetChar();
301 StringBuilder sb;
302 sb.Append(c);
303 while (!file_->IsEof()) {
304 c = file_->PeekChar();
305 if (isalpha(c) || isdigit(c) || c == '_' || c == '.' || c == '/') {
306 c = file_->GetChar();
307 sb.Append(c);
308 continue;
309 }
310 if (isspace(c)) {
311 file_->GetChar();
312 }
313 break;
314 }
315
316 std::string key = sb.ToString();
317 auto it = keyWords_.find(key);
318 token.kind = (it != keyWords_.end()) ? it->second : TokenType::ID;
319 token.value = sb.ToString();
320 }
321
ReadNum(Token & token)322 void Lexer::ReadNum(Token &token)
323 {
324 char c = file_->PeekChar();
325 if (c == '0') {
326 file_->GetChar();
327 c = file_->PeekChar();
328 if (c == 'b' || c == 'B') {
329 // binary number
330 ReadBinaryNum(token);
331 } else if (isdigit(c)) {
332 // octal number
333 return ReadOctNum(token);
334 } else if (c == 'X' || c == 'x') {
335 // hexadecimal number
336 return ReadHexNum(token);
337 } else {
338 // decimal number 0
339 token.kind = TokenType::NUM;
340 token.value = "0";
341 }
342 } else {
343 ReadDecNum(token);
344 }
345 ReadNumSuffix(token);
346 }
347
ReadBinaryNum(Token & token)348 void Lexer::ReadBinaryNum(Token &token)
349 {
350 StringBuilder sb;
351 char c = file_->GetChar(); // read 'b' or 'B'
352 sb.AppendFormat("0%c", c);
353 bool err = true;
354
355 while (!file_->IsEof()) {
356 c = file_->PeekChar();
357 if (c == '0' || c == '1') {
358 sb.Append(c);
359 file_->GetChar();
360 err = false;
361 } else {
362 break;
363 }
364 }
365
366 token.kind = err ? TokenType::UNKNOWN : TokenType::NUM;
367 token.value = sb.ToString();
368 }
369
ReadOctNum(Token & token)370 void Lexer::ReadOctNum(Token &token)
371 {
372 StringBuilder sb;
373 sb.Append("0");
374 bool err = false;
375
376 while (!file_->IsEof()) {
377 char c = file_->PeekChar();
378 if (!isdigit(c)) {
379 break;
380 }
381
382 if (!(c >= '0' && c <= '7')) {
383 err = true;
384 }
385 sb.Append(c);
386 file_->GetChar();
387 }
388
389 token.kind = err ? TokenType::UNKNOWN : TokenType::NUM;
390 token.value = sb.ToString();
391 }
392
ReadHexNum(Token & token)393 void Lexer::ReadHexNum(Token &token)
394 {
395 StringBuilder sb;
396 char c = file_->GetChar(); // read 'x' or 'X'
397 sb.AppendFormat("0%c", c);
398 bool err = true;
399
400 while (!file_->IsEof()) {
401 c = file_->PeekChar();
402 if (isdigit(c) || ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) {
403 sb.Append(c);
404 file_->GetChar();
405 err = false;
406 } else {
407 break;
408 }
409 }
410
411 token.kind = err ? TokenType::UNKNOWN : TokenType::NUM;
412 token.value = sb.ToString();
413 }
414
ReadDecNum(Token & token)415 void Lexer::ReadDecNum(Token &token)
416 {
417 StringBuilder sb;
418 char c = file_->GetChar();
419 sb.Append(c);
420
421 while (!file_->IsEof()) {
422 c = file_->PeekChar();
423 if (!isdigit(c)) {
424 break;
425 }
426
427 sb.Append(c);
428 file_->GetChar();
429 }
430
431 token.kind = TokenType::NUM;
432 token.value = sb.ToString();
433 }
434
ReadNumSuffix(Token & token)435 void Lexer::ReadNumSuffix(Token &token)
436 {
437 while (!file_->IsEof()) {
438 char c = file_->PeekChar();
439 if (isalpha(c) || isdigit(c) || c == '_' || c == '.') {
440 token.value += c;
441 file_->GetChar();
442 } else {
443 break;
444 }
445 }
446 }
447
ReadShiftLeftOp(Token & token)448 void Lexer::ReadShiftLeftOp(Token &token)
449 {
450 char c = file_->GetChar();
451 char next = file_->PeekChar();
452 if (next == '<') {
453 file_->GetChar();
454 token.kind = TokenType::LEFT_SHIFT;
455 token.value = "<<";
456 return;
457 }
458
459 std::string symbol = StringHelper::Format("%c", c);
460 auto iter = symbols_.find(symbol);
461 token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
462 token.value = symbol;
463 }
464
ReadShiftRightOp(Token & token)465 void Lexer::ReadShiftRightOp(Token &token)
466 {
467 char c = file_->GetChar();
468 char next = file_->PeekChar();
469 if (next == '>' && mode_ == ParseMode::EXPR_MODE) {
470 file_->GetChar();
471 token.kind = TokenType::RIGHT_SHIFT;
472 token.value = ">>";
473 return;
474 }
475
476 std::string symbol = StringHelper::Format("%c", c);
477 auto iter = symbols_.find(symbol);
478 token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
479 token.value = symbol;
480 }
481
ReadPPlusOp(Token & token)482 void Lexer::ReadPPlusOp(Token &token)
483 {
484 char c = file_->GetChar();
485 char next = file_->PeekChar();
486 if (next == '+') {
487 file_->GetChar();
488 token.kind = TokenType::PPLUS;
489 token.value = "++";
490 return;
491 }
492
493 std::string symbol = StringHelper::Format("%c", c);
494 auto iter = symbols_.find(symbol);
495 token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
496 token.value = symbol;
497 }
498
ReadMMinusOp(Token & token)499 void Lexer::ReadMMinusOp(Token &token)
500 {
501 char c = file_->GetChar();
502 char next = file_->PeekChar();
503 if (next == '-') {
504 file_->GetChar();
505 token.kind = TokenType::MMINUS;
506 token.value = "--";
507 return;
508 }
509
510 std::string symbol = StringHelper::Format("%c", c);
511 auto iter = symbols_.find(symbol);
512 token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
513 token.value = symbol;
514 }
515
ReadComment(Token & token)516 void Lexer::ReadComment(Token &token)
517 {
518 char c = file_->GetChar();
519 char next = file_->PeekChar();
520 if (next == '/') {
521 ReadLineComment(token);
522 return;
523 } else if (next == '*') {
524 ReadBlockComment(token);
525 return;
526 }
527
528 std::string symbol = StringHelper::Format("%c", c);
529 auto iter = symbols_.find(symbol);
530 token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
531 token.value = symbol;
532 }
533
ReadLineComment(Token & token)534 void Lexer::ReadLineComment(Token &token)
535 {
536 StringBuilder sb;
537 char c = file_->GetChar();
538 sb.AppendFormat("/%c", c);
539
540 while (!file_->IsEof()) {
541 c = file_->GetChar();
542 if (c == '\n') {
543 break;
544 }
545 sb.Append(c);
546 }
547
548 token.kind = TokenType::COMMENT_LINE;
549 token.value = sb.ToString();
550 }
551
ReadBlockComment(Token & token)552 void Lexer::ReadBlockComment(Token &token)
553 {
554 StringBuilder sb;
555 char c = file_->GetChar();
556 sb.AppendFormat("/%c", c);
557
558 while (!file_->IsEof()) {
559 c = file_->GetChar();
560 sb.Append(c);
561
562 if (c == '*' && file_->PeekChar() == '/') {
563 c = file_->GetChar();
564 sb.Append(c);
565 break;
566 }
567 }
568
569 token.kind = TokenType::COMMENT_BLOCK;
570 token.value = sb.ToString();
571 }
572
ReadSymbolToken(Token & token)573 void Lexer::ReadSymbolToken(Token &token)
574 {
575 char c = file_->GetChar();
576 std::string symbol = StringHelper::Format("%c", c);
577 auto iter = symbols_.find(symbol);
578 token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
579 token.value = symbol;
580 }
581 } // namespace Idl
582 } // namespace OHOS