1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "lexer/lexer.h"
17 #include "util/logger.h"
18 #include "util/string_builder.h"
19 #include "util/string_helper.h"
20
21 namespace OHOS {
22 namespace Idl {
23 Lexer::StrTokenTypeMap Lexer::keyWords_ = {
24 {"void", TokenType::VOID },
25 {"boolean", TokenType::BOOLEAN },
26 {"byte", TokenType::BYTE },
27 {"short", TokenType::SHORT },
28 {"int", TokenType::INT },
29 {"long", TokenType::LONG },
30 {"String", TokenType::STRING },
31 {"String16", TokenType::STRING16 },
32 {"float", TokenType::FLOAT },
33 {"double", TokenType::DOUBLE },
34 {"FileDescriptor", TokenType::FD },
35 {"Ashmem", TokenType::ASHMEM },
36 {"NativeBuffer", TokenType::NATIVE_BUFFER},
37 {"Pointer", TokenType::POINTER },
38 {"List", TokenType::LIST },
39 {"Map", TokenType::MAP },
40 {"OrderedMap", TokenType::ORDEREDMAP },
41 {"SharedMemQueue", TokenType::SMQ },
42 {"char", TokenType::CHAR },
43 {"unsigned", TokenType::UNSIGNED },
44 {"enum", TokenType::ENUM },
45 {"struct", TokenType::STRUCT },
46 {"union", TokenType::UNION },
47 {"package", TokenType::PACKAGE },
48 {"interface_token", TokenType::INTERFACE_TOKEN },
49 {"support_delegator", TokenType::SUPPORT_DELEGATOR },
50 {"option_stub_hooks", TokenType::OPTION_STUB_HOOKS },
51 {"option_parcel_hooks", TokenType::OPTION_PARCEL_HOOKS },
52 {"sequenceable", TokenType::SEQ },
53 {"rawdata", TokenType::RAWDATA },
54 {"import", TokenType::IMPORT },
55 {"interface", TokenType::INTERFACE },
56 {"extends", TokenType::EXTENDS },
57 {"oneway", TokenType::ONEWAY },
58 {"customMsgOption", TokenType::CUSTOM_MSG_OPTION },
59 {"callback", TokenType::CALLBACK },
60 {"freezecontrol", TokenType::FREEZECONTROL},
61 {"full", TokenType::FULL },
62 {"lite", TokenType::LITE },
63 {"mini", TokenType::MINI },
64 {"cacheable", TokenType::CACHEABLE },
65 {"ipccode", TokenType::IPCCODE },
66 {"ipcincapacity", TokenType::IPC_IN_CAPACITY },
67 {"ipcoutcapacity", TokenType::IPC_OUT_CAPACITY },
68 {"macrodef", TokenType::MACRODEF },
69 {"macrondef", TokenType::MACRONDEF },
70 {"in", TokenType::IN },
71 {"out", TokenType::OUT },
72 {"inout", TokenType::INOUT },
73 };
74
75 Lexer::StrTokenTypeMap Lexer::symbols_ = {
76 {".", TokenType::DOT },
77 {",", TokenType::COMMA },
78 {":", TokenType::COLON },
79 {"=", TokenType::ASSIGN },
80 {";", TokenType::SEMICOLON },
81 {"{", TokenType::BRACES_LEFT },
82 {"}", TokenType::BRACES_RIGHT },
83 {"[", TokenType::BRACKETS_LEFT },
84 {"]", TokenType::BRACKETS_RIGHT },
85 {"(", TokenType::PARENTHESES_LEFT },
86 {")", TokenType::PARENTHESES_RIGHT },
87 {"<", TokenType::ANGLE_BRACKETS_LEFT },
88 {">", TokenType::ANGLE_BRACKETS_RIGHT},
89 {"+", TokenType::ADD },
90 {"-", TokenType::SUB },
91 {"*", TokenType::STAR },
92 {"/", TokenType::SLASH },
93 {"%", TokenType::PERCENT_SIGN },
94 {"<<", TokenType::LEFT_SHIFT },
95 {">>", TokenType::RIGHT_SHIFT },
96 {"&", TokenType::AND },
97 {"^", TokenType::XOR },
98 {"|", TokenType::OR },
99 {"~", TokenType::TILDE },
100 {"++", TokenType::PPLUS },
101 {"--", TokenType::MMINUS },
102 };
103
Lexer()104 Lexer::Lexer() : filePath_(), file_(nullptr), mode_(ParseMode::DECL_MODE), havePeek_(false), curToken_() {}
105
Reset(const std::string & filePath)106 bool Lexer::Reset(const std::string &filePath)
107 {
108 file_ = std::make_unique<File>(filePath, int(File::READ));
109 if (!file_->IsValid()) {
110 return false;
111 }
112
113 havePeek_ = false;
114 InitCurToken(curToken_);
115 return true;
116 }
117
PeekToken(bool skipComment)118 Token Lexer::PeekToken(bool skipComment)
119 {
120 if (!havePeek_) {
121 ReadToken(curToken_, skipComment);
122 havePeek_ = true;
123 }
124 return curToken_;
125 }
126
GetToken(bool skipComment)127 Token Lexer::GetToken(bool skipComment)
128 {
129 if (!havePeek_) {
130 ReadToken(curToken_, skipComment);
131 }
132 havePeek_ = false;
133 return curToken_;
134 }
135
SkipCurrentLine()136 void Lexer::SkipCurrentLine()
137 {
138 while (!file_->IsEof()) {
139 char c = file_->GetChar();
140 if (c == '\n') {
141 file_->GetChar();
142 break;
143 }
144 }
145 havePeek_ = false;
146 }
147
SkipCurrentLine(char untilChar)148 bool Lexer::SkipCurrentLine(char untilChar)
149 {
150 bool ret = true;
151 while (!file_->IsEof()) {
152 int c = file_->GetChar();
153 if (c == untilChar) {
154 ret = true;
155 break;
156 }
157 if (c == '\n') {
158 file_->GetChar();
159 ret = false;
160 break;
161 }
162 }
163 havePeek_ = false;
164 return ret;
165 }
166
Skip(char untilChar)167 void Lexer::Skip(char untilChar)
168 {
169 while (!file_->IsEof()) {
170 int c = file_->GetChar();
171 if (c == untilChar) {
172 break;
173 }
174 }
175 havePeek_ = false;
176 }
177
SkipToken(TokenType tokenType)178 void Lexer::SkipToken(TokenType tokenType)
179 {
180 while (curToken_.kind != tokenType && curToken_.kind != TokenType::END_OF_FILE) {
181 GetToken(false);
182 }
183 }
184
SkipUntilToken(TokenType tokenType)185 void Lexer::SkipUntilToken(TokenType tokenType)
186 {
187 Token token = PeekToken();
188 while (token.kind != tokenType) {
189 GetToken(false);
190 token = PeekToken();
191 }
192 }
193
SkipEof()194 void Lexer::SkipEof()
195 {
196 while (!file_->IsEof()) {}
197 havePeek_ = false;
198 }
199
ReadCacheableTime(Token & token)200 bool Lexer::ReadCacheableTime(Token &token)
201 {
202 bool ret = true;
203 StringBuilder sb;
204
205 while (!file_->IsEof()) {
206 char c = file_->PeekChar();
207 if (isspace(c)) {
208 file_->GetChar();
209 continue;
210 }
211 if (!isdigit(c)) {
212 if (c != ']' && c != ',') {
213 ret = false;
214 }
215 break;
216 }
217 sb.Append(c);
218 file_->GetChar();
219 }
220
221 if (ret == false) {
222 return ret;
223 }
224
225 token.value = sb.ToString();
226 if (token.value.empty()) {
227 return false;
228 }
229
230 return ret;
231 }
232
ReadToken(Token & token,bool skipComment)233 void Lexer::ReadToken(Token &token, bool skipComment)
234 {
235 if (!file_->IsEof()) {
236 InitCurToken(token);
237 }
238 while (!file_->IsEof()) {
239 char c = file_->PeekChar();
240 if (isspace(c)) {
241 file_->GetChar();
242 continue;
243 }
244 token.location.row = file_->GetCharLineNumber();
245 token.location.col = file_->GetCharColumnNumber();
246 if (isalpha(c) || c == '_' || (c == '.' && file_->NextChar() == '.')) {
247 ReadId(token);
248 return;
249 } else if (isdigit(c)) {
250 ReadNum(token);
251 return;
252 } else if (c == '<') {
253 ReadShiftLeftOp(token);
254 return;
255 } else if (c == '>') {
256 ReadShiftRightOp(token);
257 return;
258 } else if (c == '+') {
259 ReadPPlusOp(token);
260 return;
261 } else if (c == '-') {
262 ReadMMinusOp(token);
263 return;
264 } else if (c == '/') {
265 ReadComment(token);
266 if ((token.kind == TokenType::COMMENT_BLOCK || token.kind == TokenType::COMMENT_LINE) && skipComment) {
267 InitCurToken(token);
268 continue;
269 }
270 return;
271 }
272 ReadSymbolToken(token);
273 return;
274 }
275 token.kind = TokenType::END_OF_FILE;
276 token.value = "";
277 }
278
InitCurToken(Token & token)279 void Lexer::InitCurToken(Token &token)
280 {
281 token.kind = TokenType::UNKNOWN;
282 token.location.filePath = file_->GetPath();
283 token.location.row = 1;
284 token.location.col = 1;
285 token.value = "";
286 }
287
ReadId(Token & token)288 void Lexer::ReadId(Token &token)
289 {
290 char c = file_->GetChar();
291 StringBuilder sb;
292 sb.Append(c);
293 while (!file_->IsEof()) {
294 c = file_->PeekChar();
295 if (isalpha(c) || isdigit(c) || c == '_' || c == '.' || c == '/') {
296 c = file_->GetChar();
297 sb.Append(c);
298 continue;
299 }
300 if (isspace(c)) {
301 file_->GetChar();
302 }
303 break;
304 }
305
306 std::string key = sb.ToString();
307 auto it = keyWords_.find(key);
308 token.kind = (it != keyWords_.end()) ? it->second : TokenType::ID;
309 token.value = sb.ToString();
310 }
311
ReadNum(Token & token)312 void Lexer::ReadNum(Token &token)
313 {
314 char c = file_->PeekChar();
315 if (c == '0') {
316 file_->GetChar();
317 c = file_->PeekChar();
318 if (c == 'b' || c == 'B') {
319 // binary number
320 ReadBinaryNum(token);
321 } else if (isdigit(c)) {
322 // octal number
323 return ReadOctNum(token);
324 } else if (c == 'X' || c == 'x') {
325 // hexadecimal number
326 return ReadHexNum(token);
327 } else {
328 // decimal number 0
329 token.kind = TokenType::NUM;
330 token.value = "0";
331 }
332 } else {
333 ReadDecNum(token);
334 }
335 ReadNumSuffix(token);
336 }
337
ReadBinaryNum(Token & token)338 void Lexer::ReadBinaryNum(Token &token)
339 {
340 StringBuilder sb;
341 char c = file_->GetChar(); // read 'b' or 'B'
342 sb.AppendFormat("0%c", c);
343 bool err = true;
344
345 while (!file_->IsEof()) {
346 c = file_->PeekChar();
347 if (c == '0' || c == '1') {
348 sb.Append(c);
349 file_->GetChar();
350 err = false;
351 } else {
352 break;
353 }
354 }
355
356 token.kind = err ? TokenType::UNKNOWN : TokenType::NUM;
357 token.value = sb.ToString();
358 }
359
ReadOctNum(Token & token)360 void Lexer::ReadOctNum(Token &token)
361 {
362 StringBuilder sb;
363 sb.Append("0");
364 bool err = false;
365
366 while (!file_->IsEof()) {
367 char c = file_->PeekChar();
368 if (!isdigit(c)) {
369 break;
370 }
371
372 if (!(c >= '0' && c <= '7')) {
373 err = true;
374 }
375 sb.Append(c);
376 file_->GetChar();
377 }
378
379 token.kind = err ? TokenType::UNKNOWN : TokenType::NUM;
380 token.value = sb.ToString();
381 }
382
ReadHexNum(Token & token)383 void Lexer::ReadHexNum(Token &token)
384 {
385 StringBuilder sb;
386 char c = file_->GetChar(); // read 'x' or 'X'
387 sb.AppendFormat("0%c", c);
388 bool err = true;
389
390 while (!file_->IsEof()) {
391 c = file_->PeekChar();
392 if (isdigit(c) || ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) {
393 sb.Append(c);
394 file_->GetChar();
395 err = false;
396 } else {
397 break;
398 }
399 }
400
401 token.kind = err ? TokenType::UNKNOWN : TokenType::NUM;
402 token.value = sb.ToString();
403 }
404
ReadDecNum(Token & token)405 void Lexer::ReadDecNum(Token &token)
406 {
407 StringBuilder sb;
408 char c = file_->GetChar();
409 sb.Append(c);
410
411 while (!file_->IsEof()) {
412 c = file_->PeekChar();
413 if (!isdigit(c)) {
414 break;
415 }
416
417 sb.Append(c);
418 file_->GetChar();
419 }
420
421 token.kind = TokenType::NUM;
422 token.value = sb.ToString();
423 }
424
ReadNumSuffix(Token & token)425 void Lexer::ReadNumSuffix(Token &token)
426 {
427 while (!file_->IsEof()) {
428 char c = file_->PeekChar();
429 if (isalpha(c) || isdigit(c) || c == '_' || c == '.') {
430 token.value += c;
431 file_->GetChar();
432 } else {
433 break;
434 }
435 }
436 }
437
ReadShiftLeftOp(Token & token)438 void Lexer::ReadShiftLeftOp(Token &token)
439 {
440 char c = file_->GetChar();
441 char next = file_->PeekChar();
442 if (next == '<') {
443 file_->GetChar();
444 token.kind = TokenType::LEFT_SHIFT;
445 token.value = "<<";
446 return;
447 }
448
449 std::string symbol = StringHelper::Format("%c", c);
450 auto iter = symbols_.find(symbol);
451 token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
452 token.value = symbol;
453 }
454
ReadShiftRightOp(Token & token)455 void Lexer::ReadShiftRightOp(Token &token)
456 {
457 char c = file_->GetChar();
458 char next = file_->PeekChar();
459 if (next == '>' && mode_ == ParseMode::EXPR_MODE) {
460 file_->GetChar();
461 token.kind = TokenType::RIGHT_SHIFT;
462 token.value = ">>";
463 return;
464 }
465
466 std::string symbol = StringHelper::Format("%c", c);
467 auto iter = symbols_.find(symbol);
468 token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
469 token.value = symbol;
470 }
471
ReadPPlusOp(Token & token)472 void Lexer::ReadPPlusOp(Token &token)
473 {
474 char c = file_->GetChar();
475 char next = file_->PeekChar();
476 if (next == '+') {
477 file_->GetChar();
478 token.kind = TokenType::PPLUS;
479 token.value = "++";
480 return;
481 }
482
483 std::string symbol = StringHelper::Format("%c", c);
484 auto iter = symbols_.find(symbol);
485 token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
486 token.value = symbol;
487 }
488
ReadMMinusOp(Token & token)489 void Lexer::ReadMMinusOp(Token &token)
490 {
491 char c = file_->GetChar();
492 char next = file_->PeekChar();
493 if (next == '-') {
494 file_->GetChar();
495 token.kind = TokenType::MMINUS;
496 token.value = "--";
497 return;
498 }
499
500 std::string symbol = StringHelper::Format("%c", c);
501 auto iter = symbols_.find(symbol);
502 token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
503 token.value = symbol;
504 }
505
ReadComment(Token & token)506 void Lexer::ReadComment(Token &token)
507 {
508 char c = file_->GetChar();
509 char next = file_->PeekChar();
510 if (next == '/') {
511 ReadLineComment(token);
512 return;
513 } else if (next == '*') {
514 ReadBlockComment(token);
515 return;
516 }
517
518 std::string symbol = StringHelper::Format("%c", c);
519 auto iter = symbols_.find(symbol);
520 token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
521 token.value = symbol;
522 }
523
ReadLineComment(Token & token)524 void Lexer::ReadLineComment(Token &token)
525 {
526 StringBuilder sb;
527 char c = file_->GetChar();
528 sb.AppendFormat("/%c", c);
529
530 while (!file_->IsEof()) {
531 c = file_->GetChar();
532 if (c == '\n') {
533 break;
534 }
535 sb.Append(c);
536 }
537
538 token.kind = TokenType::COMMENT_LINE;
539 token.value = sb.ToString();
540 }
541
ReadBlockComment(Token & token)542 void Lexer::ReadBlockComment(Token &token)
543 {
544 StringBuilder sb;
545 char c = file_->GetChar();
546 sb.AppendFormat("/%c", c);
547
548 while (!file_->IsEof()) {
549 c = file_->GetChar();
550 sb.Append(c);
551
552 if (c == '*' && file_->PeekChar() == '/') {
553 c = file_->GetChar();
554 sb.Append(c);
555 break;
556 }
557 }
558
559 token.kind = TokenType::COMMENT_BLOCK;
560 token.value = sb.ToString();
561 }
562
ReadSymbolToken(Token & token)563 void Lexer::ReadSymbolToken(Token &token)
564 {
565 char c = file_->GetChar();
566 std::string symbol = StringHelper::Format("%c", c);
567 auto iter = symbols_.find(symbol);
568 token.kind = (iter != symbols_.end()) ? iter->second : TokenType::UNKNOWN;
569 token.value = symbol;
570 }
571 } // namespace Idl
572 } // namespace OHOS