1 /**
2 * Copyright (c) 2024-2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "lexer.h"
17
18 #include "generated/keywords.h"
19
20 #include <public/public.h>
21
22 namespace ark::es2panda::lexer {
LexerPosition(const util::StringView & source)23 LexerPosition::LexerPosition(const util::StringView &source) : iterator_(source) {}
24
Lexer(const parser::ParserContext * parserContext,util::DiagnosticEngine & diagnosticEngine,bool startLexer)25 Lexer::Lexer(const parser::ParserContext *parserContext, util::DiagnosticEngine &diagnosticEngine, bool startLexer)
26 : allocator_(parserContext->GetProgram()->Allocator()),
27 parserContext_(parserContext),
28 source_(parserContext->GetProgram()->SourceCode()),
29 pos_(source_),
30 diagnosticEngine_(diagnosticEngine)
31 {
32 // It is necessary to set the position of the first token manually, because by default it is filled with an empty
33 // value
34 pos_.token_.loc_.start = SourcePosition {Iterator().Index(), pos_.line_, parserContext_->GetProgram()};
35 if (startLexer) {
36 SkipWhiteSpaces();
37 }
38 }
39
ScanUnicodeEscapeSequence()40 char32_t Lexer::ScanUnicodeEscapeSequence()
41 {
42 ES2PANDA_ASSERT(Iterator().Peek() == LEX_CHAR_LOWERCASE_U);
43 auto constexpr UNICODE_ESCAPE_SEQUENCE_LENGTH = 4;
44
45 Iterator().Forward(1);
46
47 if (Iterator().Peek() == LEX_CHAR_LEFT_BRACE) {
48 Iterator().Forward(1);
49 return ScanUnicodeCodePointEscape();
50 }
51
52 if (parserContext_->GetProgram()->Extension() == ScriptExtension::AS) {
53 return ScanHexEscape<UNICODE_ESCAPE_SEQUENCE_LENGTH, true>();
54 }
55
56 return ScanHexEscape<UNICODE_ESCAPE_SEQUENCE_LENGTH>();
57 }
58
59 // '\u{...}' escape sequence should have at least one hex digit inside brackets!
ScanUnicodeCodePointEscape()60 char32_t Lexer::ScanUnicodeCodePointEscape()
61 {
62 char32_t code = 0;
63 char32_t cp = Iterator().Peek();
64 if (!IsHexDigit(cp)) {
65 LogError(diagnostic::HEXADECIMAL_EXPECTED);
66 code = UNICODE_INVALID_CP;
67 }
68
69 while (true) {
70 Iterator().Forward(1);
71
72 constexpr auto MULTIPLIER = 16;
73 code = code * MULTIPLIER + HexValue(cp);
74 if (code > UNICODE_CODE_POINT_MAX) {
75 LogError(diagnostic::INVALID_UNICODE_ESCAPE);
76 code = UNICODE_INVALID_CP;
77 break;
78 }
79
80 cp = Iterator().Peek();
81 if (!IsHexDigit(cp)) {
82 break;
83 }
84 }
85
86 if (cp != LEX_CHAR_RIGHT_BRACE) {
87 LogError(diagnostic::INVALID_UNICODE_ESCAPE);
88 code = UNICODE_INVALID_CP;
89 }
90
91 Iterator().Forward(1);
92 return code;
93 }
94
Allocator()95 ArenaAllocator *Lexer::Allocator()
96 {
97 return allocator_;
98 }
99
GetToken()100 Token &Lexer::GetToken()
101 {
102 return pos_.token_;
103 }
104
GetToken() const105 const Token &Lexer::GetToken() const
106 {
107 return pos_.token_;
108 }
109
Line() const110 size_t Lexer::Line() const
111 {
112 return pos_.line_;
113 }
114
GetProgram() const115 const parser::Program *Lexer::GetProgram() const
116 {
117 return parserContext_->GetProgram();
118 }
119
Save() const120 LexerPosition Lexer::Save() const
121 {
122 return pos_;
123 }
124
BackwardToken(TokenType type,size_t offset)125 void Lexer::BackwardToken(TokenType type, size_t offset)
126 {
127 pos_.token_.type_ = type;
128 pos_.iterator_.Reset(GetToken().End().index - offset);
129 pos_.nextTokenLine_ = 0;
130 }
131
ForwardToken(TokenType type,size_t offset)132 void Lexer::ForwardToken(TokenType type, size_t offset)
133 {
134 pos_.token_.type_ = type;
135 pos_.iterator_.Forward(offset);
136 SkipWhiteSpaces();
137 }
138
ForwardToken(TokenType type)139 void Lexer::ForwardToken(TokenType type)
140 {
141 pos_.token_.type_ = type;
142 pos_.iterator_.Forward(1);
143 }
144
Rewind(const LexerPosition & pos)145 void Lexer::Rewind(const LexerPosition &pos)
146 {
147 pos_ = pos;
148 }
149
Lookahead()150 char32_t Lexer::Lookahead()
151 {
152 return Iterator().Peek();
153 }
154
GetIndex()155 size_t Lexer::GetIndex()
156 {
157 return Iterator().Index();
158 }
159
SourceView(const util::StringView::Iterator & begin,const util::StringView::Iterator & end) const160 util::StringView Lexer::SourceView(const util::StringView::Iterator &begin, const util::StringView::Iterator &end) const
161 {
162 return SourceView(begin.Index(), end.Index());
163 }
164
SourceView(size_t begin,size_t end) const165 util::StringView Lexer::SourceView(size_t begin, size_t end) const
166 {
167 return source_.Substr(begin, end);
168 }
169
SkipMultiLineComment()170 void Lexer::SkipMultiLineComment()
171 {
172 while (true) {
173 switch (Iterator().Next()) {
174 case util::StringView::Iterator::INVALID_CP: {
175 LogError(diagnostic::UNTERMINATED_MULTI_LINE_COMMENT);
176 return;
177 }
178 case LEX_CHAR_CR: {
179 if (Iterator().Peek() == LEX_CHAR_LF) {
180 Iterator().Forward(1);
181 }
182 [[fallthrough]];
183 }
184 case LEX_CHAR_LF:
185 case LEX_CHAR_LS:
186 case LEX_CHAR_PS: {
187 pos_.nextTokenLine_++;
188 continue;
189 }
190 case LEX_CHAR_ASTERISK: {
191 if (Iterator().Peek() == LEX_CHAR_SLASH) {
192 Iterator().Forward(1);
193 return;
194 }
195
196 break;
197 }
198 default: {
199 break;
200 }
201 }
202 }
203 }
204
205 /* New line character is not processed */
SkipSingleLineComment()206 void Lexer::SkipSingleLineComment()
207 {
208 while (true) {
209 switch (Iterator().Next()) {
210 case util::StringView::Iterator::INVALID_CP:
211 case LEX_CHAR_CR: {
212 if (Iterator().Peek() == LEX_CHAR_LF) {
213 Iterator().Forward(1);
214 }
215
216 [[fallthrough]];
217 }
218 case LEX_CHAR_LF:
219 case LEX_CHAR_LS:
220 case LEX_CHAR_PS: {
221 pos_.nextTokenLine_++;
222 return;
223 }
224 default: {
225 break;
226 }
227 }
228 }
229 }
230
LogUnexpectedToken(lexer::TokenType const tokenType) const231 void Lexer::LogUnexpectedToken(lexer::TokenType const tokenType) const
232 {
233 LogError(diagnostic::UNEXPECTED_TOKEN_PARAM, {TokenToString(tokenType)});
234 }
235
LogError(const diagnostic::DiagnosticKind & diagnostic,const util::DiagnosticMessageParams & diagnosticParams,const lexer::SourcePosition & pos) const236 void Lexer::LogError(const diagnostic::DiagnosticKind &diagnostic,
237 const util::DiagnosticMessageParams &diagnosticParams, const lexer::SourcePosition &pos) const
238 {
239 diagnosticEngine_.LogDiagnostic(diagnostic, diagnosticParams, pos);
240 }
241
LogError(const diagnostic::DiagnosticKind & diagnostic,const util::DiagnosticMessageParams & diagnosticParams) const242 void Lexer::LogError(const diagnostic::DiagnosticKind &diagnostic,
243 const util::DiagnosticMessageParams &diagnosticParams) const
244 {
245 LogError(diagnostic, diagnosticParams, GetToken().Start());
246 }
247
CheckNumberLiteralEnd()248 void Lexer::CheckNumberLiteralEnd()
249 {
250 if (Iterator().Peek() == LEX_CHAR_LOWERCASE_N) {
251 GetToken().flags_ |= TokenFlags::NUMBER_BIGINT;
252 GetToken().src_ = SourceView(GetToken().Start().index, Iterator().Index());
253 Iterator().Forward(1);
254 } else {
255 GetToken().src_ = SourceView(GetToken().Start().index, Iterator().Index());
256 }
257
258 const auto nextCp = Iterator().PeekCp();
259 if (IsDecimalDigit(nextCp)) {
260 LogError(diagnostic::INVALID_NUMERIC_LIT);
261 return;
262 }
263 CheckNumberLiteralEndForIdentifier();
264 }
265
CheckNumberLiteralEndForIdentifier()266 void Lexer::CheckNumberLiteralEndForIdentifier()
267 {
268 // This check is needed only in Ecmascript
269 const auto nextCp = Iterator().PeekCp();
270 if (KeywordsUtil::IsIdentifierStart(nextCp)) {
271 LogError(diagnostic::INVALID_NUMERIC_LIT);
272 }
273 }
274
ScanDecimalNumbers()275 void Lexer::ScanDecimalNumbers()
276 {
277 bool allowNumericOnNext = true;
278
279 while (true) {
280 switch (Iterator().Peek()) {
281 case LEX_CHAR_0:
282 case LEX_CHAR_1:
283 case LEX_CHAR_2:
284 case LEX_CHAR_3:
285 case LEX_CHAR_4:
286 case LEX_CHAR_5:
287 case LEX_CHAR_6:
288 case LEX_CHAR_7:
289 case LEX_CHAR_8:
290 case LEX_CHAR_9: {
291 Iterator().Forward(1);
292 allowNumericOnNext = true;
293 break;
294 }
295 case LEX_CHAR_UNDERSCORE: {
296 Iterator().Backward(1);
297
298 if (Iterator().Peek() == LEX_CHAR_DOT || !allowNumericOnNext) {
299 Iterator().Forward(1);
300 LogError(diagnostic::INVALID_NUMERIC_SEP);
301 }
302
303 GetToken().flags_ |= TokenFlags::NUMBER_HAS_UNDERSCORE;
304 Iterator().Forward(2U);
305 allowNumericOnNext = false;
306 break;
307 }
308 default: {
309 if (!allowNumericOnNext) {
310 LogError(diagnostic::INVALID_NUMERIC_SEP_AT_END_OF_NUM);
311 }
312 return;
313 }
314 }
315 }
316 }
317
ConvertNumber(NumberFlags flags)318 void Lexer::ConvertNumber([[maybe_unused]] NumberFlags flags)
319 {
320 ConversionResult res;
321 const long double temp = StrToNumeric(&std::strtold, GetToken().src_.Utf8().data(), res);
322 if (res == ConversionResult::SUCCESS) {
323 GetToken().number_ = Number(GetToken().src_, static_cast<double>(temp));
324 } else if (res == ConversionResult::INVALID_ARGUMENT) {
325 LogError(diagnostic::INVALID_NUM);
326 } else if (res == ConversionResult::OUT_OF_RANGE) {
327 GetToken().number_ = Number(GetToken().src_, std::numeric_limits<double>::infinity());
328 }
329 }
330
ScanNumber(bool const leadingMinus,bool allowBigInt)331 void Lexer::ScanNumber(bool const leadingMinus, bool allowBigInt)
332 {
333 const bool isPeriod = GetToken().type_ == TokenType::PUNCTUATOR_PERIOD;
334 GetToken().type_ = TokenType::LITERAL_NUMBER;
335 GetToken().keywordType_ = TokenType::LITERAL_NUMBER;
336
337 if (!isPeriod) {
338 ScanDecimalNumbers();
339 }
340
341 bool parseExponent = true;
342 auto flags = NumberFlags::NONE;
343
344 if (Iterator().Peek() == LEX_CHAR_DOT || isPeriod) {
345 flags |= NumberFlags::DECIMAL_POINT;
346 allowBigInt = false;
347 if (!isPeriod) {
348 Iterator().Forward(1);
349 }
350
351 auto cp = Iterator().Peek();
352 if (IsDecimalDigit(cp) || cp == LEX_CHAR_LOWERCASE_E || cp == LEX_CHAR_UPPERCASE_E) {
353 ScanDecimalNumbers();
354 } else {
355 parseExponent = false;
356 }
357 }
358
359 auto const signPosition = ScanCharLex(parseExponent, allowBigInt, flags);
360
361 CheckNumberLiteralEnd();
362
363 if ((GetToken().flags_ & TokenFlags::NUMBER_BIGINT) != 0) {
364 if (!allowBigInt) {
365 LogError(diagnostic::INVALID_BIGINT);
366 }
367
368 return;
369 }
370
371 util::StringView sv = SourceView(GetToken().Start().index, Iterator().Index());
372 std::string utf8 = !leadingMinus ? std::string {sv.Utf8()} : '-' + std::string {sv.Utf8()};
373 bool needConversion = leadingMinus;
374
375 if (signPosition) {
376 utf8.insert(*signPosition + (!leadingMinus ? 0U : 1U), 1U, '+');
377 needConversion = true;
378 }
379
380 if ((GetToken().flags_ & TokenFlags::NUMBER_HAS_UNDERSCORE) != 0U) {
381 utf8.erase(std::remove(utf8.begin(), utf8.end(), LEX_CHAR_UNDERSCORE), utf8.end());
382 needConversion = true;
383 }
384
385 GetToken().src_ = needConversion ? util::UString(utf8, Allocator()).View() : sv;
386
387 ConvertNumber(flags);
388 }
389
ScanCharLex(bool const parseExponent,bool & allowBigInt,NumberFlags & flags)390 std::optional<std::size_t> Lexer::ScanCharLex(bool const parseExponent, bool &allowBigInt, NumberFlags &flags)
391 {
392 std::optional<std::size_t> rc {};
393
394 if (auto const ch = Iterator().Peek(); ch == LEX_CHAR_LOWERCASE_E || ch == LEX_CHAR_UPPERCASE_E) {
395 allowBigInt = false;
396
397 if (parseExponent) {
398 flags |= NumberFlags::EXPONENT;
399
400 Iterator().Forward(1);
401
402 rc = ScanSignOfNumber();
403
404 if (!IsDecimalDigit(Iterator().Peek())) {
405 LogError(diagnostic::INVALID_NUMERIC_LIT);
406 }
407 ScanDecimalNumbers();
408 }
409 }
410
411 return rc;
412 }
413
ScanSignOfNumber()414 std::optional<std::size_t> Lexer::ScanSignOfNumber() noexcept
415 {
416 switch (Iterator().Peek()) {
417 case LEX_CHAR_UNDERSCORE: {
418 break;
419 }
420 case LEX_CHAR_PLUS:
421 case LEX_CHAR_MINUS: {
422 Iterator().Forward(1);
423 break;
424 }
425 default: {
426 return std::make_optional(Iterator().Index() - GetToken().Start().index);
427 }
428 }
429 return std::nullopt;
430 }
431
PushTemplateContext(TemplateLiteralParserContext * ctx)432 void Lexer::PushTemplateContext(TemplateLiteralParserContext *ctx)
433 {
434 tlCtx_ = ctx;
435 }
436
ScanTemplateStringEnd()437 void Lexer::ScanTemplateStringEnd()
438 {
439 if (Iterator().Peek() == LEX_CHAR_BACK_TICK) {
440 Iterator().Forward(1);
441 SetTokenEnd();
442 SkipWhiteSpaces();
443 } else {
444 LogError(diagnostic::UNEXPECTED_TOKEN_EXPECTED_PARAM, {TokenToString(TokenType::PUNCTUATOR_BACK_TICK)});
445 }
446 }
447
CheckOctalDigit(char32_t const nextCp)448 bool Lexer::CheckOctalDigit(char32_t const nextCp)
449 {
450 if (IsOctalDigit(nextCp)) {
451 Iterator().Forward(1);
452
453 if (Iterator().Peek() != LEX_CHAR_BACK_TICK) {
454 LogError(diagnostic::OCTAL_ESCAPE_IN_TEMPLATE_STRINGS);
455 return false;
456 }
457
458 Iterator().Backward(1);
459 }
460 return true;
461 }
462
ScanTemplateStringCpHelper(char32_t cp,LexerTemplateString templateStr)463 std::tuple<bool, bool, LexerTemplateString> Lexer::ScanTemplateStringCpHelper(char32_t cp,
464 LexerTemplateString templateStr)
465 {
466 switch (cp) {
467 case util::StringView::Iterator::INVALID_CP:
468 LogError(diagnostic::UNEXPECTED_TOKEN_EXPECTED_BACKTICK_OR_DOLLAR_LBRACE);
469 return {true, false, templateStr};
470 case LEX_CHAR_BACK_TICK:
471 templateStr.end = Iterator().Index();
472 return {true, false, templateStr};
473 case LEX_CHAR_CR: {
474 Iterator().Forward(1);
475
476 if (Iterator().Peek() != LEX_CHAR_LF) {
477 Iterator().Backward(1);
478 }
479
480 [[fallthrough]];
481 }
482 case LEX_CHAR_LF:
483 pos_.line_++;
484 templateStr.str.Append(LEX_CHAR_LF);
485 Iterator().Forward(1);
486 return {false, true, templateStr};
487 case LEX_CHAR_BACKSLASH: {
488 Iterator().Forward(1);
489
490 char32_t nextCp = Iterator().Peek();
491 templateStr.validSequence = CheckOctalDigit(nextCp);
492
493 if (nextCp == LEX_CHAR_BACK_TICK || nextCp == LEX_CHAR_BACKSLASH || nextCp == LEX_CHAR_DOLLAR_SIGN) {
494 templateStr.str.Append(cp);
495 templateStr.str.Append(nextCp);
496 Iterator().Forward(1);
497 return {false, true, templateStr};
498 }
499
500 Iterator().Backward(1);
501 return {false, false, templateStr};
502 }
503 case LEX_CHAR_DOLLAR_SIGN:
504 templateStr.end = Iterator().Index();
505 Iterator().Forward(1);
506
507 if (Iterator().Peek() == LEX_CHAR_LEFT_BRACE) {
508 Iterator().Forward(1);
509 templateStr.scanExpression = true;
510 SkipWhiteSpaces();
511 return {true, false, templateStr};
512 }
513
514 templateStr.str.Append(cp);
515 return {false, true, templateStr};
516 default:
517 return {false, false, templateStr};
518 }
519 return {false, false, templateStr};
520 }
521
ScanTemplateString()522 LexerTemplateString Lexer::ScanTemplateString()
523 {
524 LexerTemplateString templateStr(Allocator());
525 size_t cpSize = 0U;
526
527 while (true) {
528 char32_t cp = Iterator().PeekCp(&cpSize);
529
530 bool isReturn = false;
531 bool isContinue = false;
532 std::tie(isReturn, isContinue, templateStr) = ScanTemplateStringCpHelper(cp, templateStr);
533 if (isReturn) {
534 return templateStr;
535 }
536 if (isContinue) {
537 continue;
538 }
539
540 templateStr.str.Append(cp);
541 Iterator().Forward(cpSize);
542 }
543
544 ES2PANDA_UNREACHABLE();
545 return templateStr;
546 }
547
ScanMultilineString()548 util::StringView Lexer::ScanMultilineString()
549 {
550 util::UString str(Allocator());
551 size_t cpSize = 0U;
552 bool isreturn = false;
553
554 while (!isreturn) {
555 char32_t cp = Iterator().PeekCp(&cpSize);
556 switch (cp) {
557 case util::StringView::Iterator::INVALID_CP:
558 LogError(diagnostic::UNEXPECTED_TOKEN_EXPECTED_PARAM, {TokenToString(TokenType::PUNCTUATOR_BACK_TICK)});
559 [[fallthrough]];
560 case LEX_CHAR_BACK_TICK:
561 isreturn = true;
562 break;
563 case LEX_CHAR_CR: {
564 Iterator().Forward(1);
565 if (Iterator().Peek() != LEX_CHAR_LF) {
566 Iterator().Backward(1);
567 }
568 [[fallthrough]];
569 }
570 case LEX_CHAR_LF:
571 pos_.line_++;
572 str.Append(LEX_CHAR_LF);
573 Iterator().Forward(1);
574 continue;
575 case LEX_CHAR_BACKSLASH: {
576 Iterator().Forward(1);
577 char32_t nextCp = ScanUnicodeCharacter();
578 str.Append(nextCp);
579 continue;
580 }
581 default: {
582 break;
583 }
584 }
585
586 if (isreturn) {
587 return str.View();
588 }
589
590 str.Append(cp);
591 Iterator().Forward(cpSize);
592 }
593
594 ES2PANDA_UNREACHABLE();
595 return str.View();
596 }
597
ResetTokenEnd()598 void Lexer::ResetTokenEnd()
599 {
600 SetTokenStart();
601 pos_.iterator_.Reset(GetToken().End().index);
602 pos_.line_ = GetToken().End().line;
603 pos_.nextTokenLine_ = 0;
604 }
605
ScanStringUnicodePart(util::UString * str)606 bool Lexer::ScanStringUnicodePart(util::UString *str)
607 {
608 const auto savedLine = Pos().Line();
609 const auto cp = ScanUnicodeCharacter();
610 if (Pos().Line() > savedLine) {
611 return true;
612 }
613
614 if (cp == util::StringView::Iterator::INVALID_CP) {
615 return false;
616 }
617
618 str->Append(cp);
619 return true;
620 }
621
ScanUnicodeCharacterHelper(size_t cpSize,char32_t cp)622 char32_t Lexer::ScanUnicodeCharacterHelper(size_t cpSize, char32_t cp)
623 {
624 Iterator().Forward(cpSize);
625 return cp;
626 }
627
628 // CC-OFFNXT(huge_method,G.FUN.01) big switch-case, solid logic
ScanUnicodeCharacter()629 char32_t Lexer::ScanUnicodeCharacter()
630 {
631 size_t cpSize {};
632 char32_t cp = Iterator().PeekCp(&cpSize);
633
634 switch (cp) {
635 case util::StringView::Iterator::INVALID_CP:
636 LogError(diagnostic::UNTERMINATED_STRING);
637 break;
638 case LEX_CHAR_CR:
639 Iterator().Forward(1);
640 if (Iterator().Peek() != LEX_CHAR_LF) {
641 Iterator().Backward(1);
642 }
643
644 [[fallthrough]];
645 case LEX_CHAR_LS:
646 case LEX_CHAR_PS:
647 case LEX_CHAR_LF:
648 pos_.line_++;
649 return ScanUnicodeCharacterHelper(cpSize, util::StringView::Iterator::INVALID_CP);
650 case LEX_CHAR_LOWERCASE_B:
651 return ScanUnicodeCharacterHelper(cpSize, LEX_CHAR_BS);
652 case LEX_CHAR_LOWERCASE_T:
653 return ScanUnicodeCharacterHelper(cpSize, LEX_CHAR_TAB);
654 case LEX_CHAR_LOWERCASE_N:
655 return ScanUnicodeCharacterHelper(cpSize, LEX_CHAR_LF);
656 case LEX_CHAR_LOWERCASE_V:
657 return ScanUnicodeCharacterHelper(cpSize, LEX_CHAR_VT);
658 case LEX_CHAR_LOWERCASE_F:
659 return ScanUnicodeCharacterHelper(cpSize, LEX_CHAR_FF);
660 case LEX_CHAR_LOWERCASE_R:
661 return ScanUnicodeCharacterHelper(cpSize, LEX_CHAR_CR);
662 case LEX_CHAR_LOWERCASE_X:
663 Iterator().Forward(1);
664 return ScanHexEscape<2U>();
665 case LEX_CHAR_LOWERCASE_U:
666 return ScanUnicodeEscapeSequence();
667 case LEX_CHAR_0: {
668 Iterator().Forward(1);
669 bool isDecimal = IsDecimalDigit(Iterator().Peek());
670 Iterator().Backward(1);
671
672 if (!isDecimal) {
673 return ScanUnicodeCharacterHelper(cpSize, LEX_CHAR_NULL);
674 }
675
676 [[fallthrough]];
677 }
678 default:
679 if (IsDecimalDigit(Iterator().Peek())) {
680 LogError(diagnostic::INVALID_CHAR_ESCAPE);
681 cp = UNICODE_INVALID_CP;
682 }
683 break;
684 }
685
686 return ScanUnicodeCharacterHelper(cpSize, cp);
687 }
688
ScanQuestionPunctuator()689 void Lexer::ScanQuestionPunctuator()
690 {
691 GetToken().type_ = TokenType::PUNCTUATOR_QUESTION_MARK;
692
693 switch (Iterator().Peek()) {
694 case LEX_CHAR_QUESTION: {
695 GetToken().type_ = TokenType::PUNCTUATOR_NULLISH_COALESCING;
696 Iterator().Forward(1);
697
698 switch (Iterator().Peek()) {
699 case LEX_CHAR_EQUALS: {
700 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_NULLISH_EQUAL;
701 Iterator().Forward(1);
702 break;
703 }
704 default: {
705 break;
706 }
707 }
708
709 break;
710 }
711 case LEX_CHAR_DOT: {
712 Iterator().Forward(1);
713
714 if (!IsDecimalDigit(Iterator().Peek())) {
715 GetToken().type_ = TokenType::PUNCTUATOR_QUESTION_DOT;
716 return;
717 }
718
719 Iterator().Backward(1);
720 break;
721 }
722 default: {
723 break;
724 }
725 }
726 }
727
ScanLessThanPunctuator()728 void Lexer::ScanLessThanPunctuator()
729 {
730 GetToken().type_ = TokenType::PUNCTUATOR_LESS_THAN;
731
732 switch (Iterator().Peek()) {
733 case LEX_CHAR_LESS_THAN: {
734 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_SHIFT;
735 Iterator().Forward(1);
736
737 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
738 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_SHIFT_EQUAL;
739 Iterator().Forward(1);
740 }
741 break;
742 }
743 case LEX_CHAR_EQUALS: {
744 GetToken().type_ = TokenType::PUNCTUATOR_LESS_THAN_EQUAL;
745 Iterator().Forward(1);
746 break;
747 }
748 default: {
749 break;
750 }
751 }
752 }
753
ScanGreaterThanPunctuator()754 void Lexer::ScanGreaterThanPunctuator()
755 {
756 GetToken().type_ = TokenType::PUNCTUATOR_GREATER_THAN;
757
758 switch (Iterator().Peek()) {
759 case LEX_CHAR_GREATER_THAN: {
760 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_SHIFT;
761 Iterator().Forward(1);
762
763 switch (Iterator().Peek()) {
764 case LEX_CHAR_GREATER_THAN: {
765 GetToken().type_ = TokenType::PUNCTUATOR_UNSIGNED_RIGHT_SHIFT;
766 Iterator().Forward(1);
767
768 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
769 GetToken().type_ = TokenType::PUNCTUATOR_UNSIGNED_RIGHT_SHIFT_EQUAL;
770 Iterator().Forward(1);
771 }
772 break;
773 }
774 case LEX_CHAR_EQUALS: {
775 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_SHIFT_EQUAL;
776 Iterator().Forward(1);
777 break;
778 }
779 default: {
780 break;
781 }
782 }
783 break;
784 }
785 case LEX_CHAR_EQUALS: {
786 GetToken().type_ = TokenType::PUNCTUATOR_GREATER_THAN_EQUAL;
787 Iterator().Forward(1);
788 break;
789 }
790 default: {
791 break;
792 }
793 }
794 }
795
ScanEqualsPunctuator()796 void Lexer::ScanEqualsPunctuator()
797 {
798 GetToken().type_ = TokenType::PUNCTUATOR_SUBSTITUTION;
799
800 switch (Iterator().Peek()) {
801 case LEX_CHAR_EQUALS: {
802 GetToken().type_ = TokenType::PUNCTUATOR_EQUAL;
803 Iterator().Forward(1);
804
805 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
806 GetToken().type_ = TokenType::PUNCTUATOR_STRICT_EQUAL;
807 Iterator().Forward(1);
808 }
809 break;
810 }
811 case LEX_CHAR_GREATER_THAN: {
812 GetToken().type_ = TokenType::PUNCTUATOR_ARROW;
813 Iterator().Forward(1);
814 break;
815 }
816 default: {
817 break;
818 }
819 }
820 }
821
ScanExclamationPunctuator()822 void Lexer::ScanExclamationPunctuator()
823 {
824 GetToken().type_ = TokenType::PUNCTUATOR_EXCLAMATION_MARK;
825
826 switch (Iterator().Peek()) {
827 case LEX_CHAR_EQUALS: {
828 GetToken().type_ = TokenType::PUNCTUATOR_NOT_EQUAL;
829 Iterator().Forward(1);
830
831 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
832 GetToken().type_ = TokenType::PUNCTUATOR_NOT_STRICT_EQUAL;
833 Iterator().Forward(1);
834 }
835 break;
836 }
837 default: {
838 break;
839 }
840 }
841 }
842
ScanAmpersandPunctuator()843 void Lexer::ScanAmpersandPunctuator()
844 {
845 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_AND;
846
847 switch (Iterator().Peek()) {
848 case LEX_CHAR_AMPERSAND: {
849 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_AND;
850 Iterator().Forward(1);
851
852 switch (Iterator().Peek()) {
853 case LEX_CHAR_EQUALS: {
854 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_AND_EQUAL;
855 Iterator().Forward(1);
856 break;
857 }
858 default: {
859 break;
860 }
861 }
862
863 break;
864 }
865 case LEX_CHAR_EQUALS: {
866 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_AND_EQUAL;
867 Iterator().Forward(1);
868 break;
869 }
870 default: {
871 break;
872 }
873 }
874 }
875
ScanAtPunctuator()876 void Lexer::ScanAtPunctuator()
877 {
878 GetToken().type_ = TokenType::PUNCTUATOR_AT;
879
880 if (Iterator().Peek() == LEX_CHAR_AT) {
881 GetToken().type_ = TokenType::PUNCTUATOR_FORMAT;
882 Iterator().Forward(1U);
883 }
884 }
885
ScanVLinePunctuator()886 void Lexer::ScanVLinePunctuator()
887 {
888 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_OR;
889
890 switch (Iterator().Peek()) {
891 case LEX_CHAR_VLINE: {
892 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_OR;
893 Iterator().Forward(1);
894
895 switch (Iterator().Peek()) {
896 case LEX_CHAR_EQUALS: {
897 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_OR_EQUAL;
898 Iterator().Forward(1);
899 break;
900 }
901 default: {
902 break;
903 }
904 }
905
906 break;
907 }
908 case LEX_CHAR_EQUALS: {
909 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_OR_EQUAL;
910 Iterator().Forward(1);
911 break;
912 }
913 default: {
914 break;
915 }
916 }
917 }
918
ScanCircumflexPunctuator()919 void Lexer::ScanCircumflexPunctuator()
920 {
921 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_XOR;
922
923 switch (Iterator().Peek()) {
924 case LEX_CHAR_EQUALS: {
925 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_XOR_EQUAL;
926 Iterator().Forward(1);
927 break;
928 }
929 default: {
930 break;
931 }
932 }
933 }
934
ScanPlusPunctuator()935 void Lexer::ScanPlusPunctuator()
936 {
937 GetToken().type_ = TokenType::PUNCTUATOR_PLUS;
938
939 switch (Iterator().Peek()) {
940 case LEX_CHAR_PLUS: {
941 GetToken().type_ = TokenType::PUNCTUATOR_PLUS_PLUS;
942 Iterator().Forward(1);
943 break;
944 }
945 case LEX_CHAR_EQUALS: {
946 GetToken().type_ = TokenType::PUNCTUATOR_PLUS_EQUAL;
947 Iterator().Forward(1);
948 break;
949 }
950 default: {
951 break;
952 }
953 }
954 }
955
ScanMinusPunctuator()956 void Lexer::ScanMinusPunctuator()
957 {
958 GetToken().type_ = TokenType::PUNCTUATOR_MINUS;
959
960 switch (Iterator().Peek()) {
961 case LEX_CHAR_MINUS: {
962 GetToken().type_ = TokenType::PUNCTUATOR_MINUS_MINUS;
963 Iterator().Forward(1);
964 break;
965 }
966 case LEX_CHAR_EQUALS: {
967 GetToken().type_ = TokenType::PUNCTUATOR_MINUS_EQUAL;
968 Iterator().Forward(1);
969 break;
970 }
971 default: {
972 break;
973 }
974 }
975 }
976
ScanSlashPunctuator()977 void Lexer::ScanSlashPunctuator()
978 {
979 GetToken().type_ = TokenType::PUNCTUATOR_DIVIDE;
980 auto cp = Iterator().Peek();
981 if (cp == LEX_CHAR_EQUALS) {
982 GetToken().type_ = TokenType::PUNCTUATOR_DIVIDE_EQUAL;
983 Iterator().Forward(1);
984 }
985
986 Iterator().Backward(1);
987 if (!IsValidJsDocStart(&cp)) {
988 Iterator().Forward(1);
989 return;
990 }
991 Iterator().Forward(JS_DOC_START_SIZE + 1);
992 GetToken().type_ = TokenType::JS_DOC_START;
993 pos_.nextTokenLine_ += 1;
994 }
995
ScanDotPunctuator(KeywordsUtil & kwu)996 void Lexer::ScanDotPunctuator(KeywordsUtil &kwu)
997 {
998 GetToken().type_ = TokenType::PUNCTUATOR_PERIOD;
999
1000 switch (Iterator().Peek()) {
1001 case LEX_CHAR_0:
1002 case LEX_CHAR_1:
1003 case LEX_CHAR_2:
1004 case LEX_CHAR_3:
1005 case LEX_CHAR_4:
1006 case LEX_CHAR_5:
1007 case LEX_CHAR_6:
1008 case LEX_CHAR_7:
1009 case LEX_CHAR_8:
1010 case LEX_CHAR_9: {
1011 ScanNumber((kwu.Flags() & NextTokenFlags::UNARY_MINUS) != std::underlying_type_t<NextTokenFlags>(0U));
1012 break;
1013 }
1014 case LEX_CHAR_QUESTION: {
1015 GetToken().type_ = TokenType::PUNCTUATOR_PERIOD_QUESTION;
1016 Iterator().Forward(1);
1017 break;
1018 }
1019 case LEX_CHAR_DOT: {
1020 Iterator().Forward(1);
1021
1022 if (Iterator().Peek() == LEX_CHAR_DOT) {
1023 GetToken().type_ = TokenType::PUNCTUATOR_PERIOD_PERIOD_PERIOD;
1024 Iterator().Forward(1);
1025 break;
1026 }
1027
1028 Iterator().Backward(1);
1029 break;
1030 }
1031 default: {
1032 break;
1033 }
1034 }
1035 }
1036
ScanAsteriskPunctuator()1037 void Lexer::ScanAsteriskPunctuator()
1038 {
1039 GetToken().type_ = TokenType::PUNCTUATOR_MULTIPLY;
1040
1041 switch (Iterator().Peek()) {
1042 case LEX_CHAR_ASTERISK: {
1043 GetToken().type_ = TokenType::PUNCTUATOR_EXPONENTIATION;
1044 Iterator().Forward(1);
1045
1046 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
1047 GetToken().type_ = TokenType::PUNCTUATOR_EXPONENTIATION_EQUAL;
1048 Iterator().Forward(1);
1049 }
1050 break;
1051 }
1052 case LEX_CHAR_EQUALS: {
1053 GetToken().type_ = TokenType::PUNCTUATOR_MULTIPLY_EQUAL;
1054 Iterator().Forward(1);
1055 break;
1056 }
1057 default: {
1058 break;
1059 }
1060 }
1061 }
1062
ScanPercentPunctuator()1063 void Lexer::ScanPercentPunctuator()
1064 {
1065 GetToken().type_ = TokenType::PUNCTUATOR_MOD;
1066
1067 switch (Iterator().Peek()) {
1068 case LEX_CHAR_EQUALS: {
1069 GetToken().type_ = TokenType::PUNCTUATOR_MOD_EQUAL;
1070 Iterator().Forward(1);
1071 break;
1072 }
1073 default: {
1074 break;
1075 }
1076 }
1077 }
1078
IsLineTerminatorOrEos() const1079 bool Lexer::IsLineTerminatorOrEos() const
1080 {
1081 switch (Iterator().PeekCp()) {
1082 case util::StringView::Iterator::INVALID_CP:
1083 case LEX_CHAR_LF:
1084 case LEX_CHAR_CR:
1085 case LEX_CHAR_LS:
1086 case LEX_CHAR_PS: {
1087 return true;
1088 }
1089 default: {
1090 break;
1091 }
1092 }
1093
1094 return false;
1095 }
1096
ScanRegExpPattern()1097 bool Lexer::ScanRegExpPattern()
1098 {
1099 bool isCharClass = false;
1100 size_t cpSize {};
1101
1102 while (true) {
1103 switch (Iterator().PeekCp(&cpSize)) {
1104 case util::StringView::Iterator::INVALID_CP:
1105 case LEX_CHAR_LF:
1106 case LEX_CHAR_CR:
1107 case LEX_CHAR_LS:
1108 case LEX_CHAR_PS: {
1109 LogError(diagnostic::UNTERMINATED_REGEX);
1110 return false;
1111 }
1112 case LEX_CHAR_SLASH: {
1113 if (!isCharClass) {
1114 return true;
1115 }
1116
1117 break;
1118 }
1119 case LEX_CHAR_LEFT_SQUARE: {
1120 isCharClass = true;
1121 break;
1122 }
1123 case LEX_CHAR_RIGHT_SQUARE: {
1124 isCharClass = false;
1125 break;
1126 }
1127 case LEX_CHAR_BACKSLASH: {
1128 Iterator().Forward(1);
1129
1130 if (IsLineTerminatorOrEos()) {
1131 continue;
1132 }
1133
1134 break;
1135 }
1136 default: {
1137 break;
1138 }
1139 }
1140
1141 Iterator().Forward(cpSize);
1142 }
1143
1144 return true;
1145 }
1146
ScanRegExpFlags()1147 RegExpFlags Lexer::ScanRegExpFlags()
1148 {
1149 RegExpFlags resultFlags = RegExpFlags::EMPTY;
1150
1151 while (true) {
1152 size_t cpSize {};
1153 auto cp = Iterator().PeekCp(&cpSize);
1154 if (!KeywordsUtil::IsIdentifierPart(cp)) {
1155 break;
1156 }
1157
1158 Iterator().Forward(cpSize);
1159
1160 RegExpFlags flag = RegExpFlags::EMPTY;
1161
1162 switch (cp) {
1163 case LEX_CHAR_LOWERCASE_G: {
1164 flag = RegExpFlags::GLOBAL;
1165 break;
1166 }
1167 case LEX_CHAR_LOWERCASE_I: {
1168 flag = RegExpFlags::IGNORE_CASE;
1169 break;
1170 }
1171 case LEX_CHAR_LOWERCASE_M: {
1172 flag = RegExpFlags::MULTILINE;
1173 break;
1174 }
1175 case LEX_CHAR_LOWERCASE_S: {
1176 flag = RegExpFlags::DOTALL;
1177 break;
1178 }
1179 case LEX_CHAR_LOWERCASE_U: {
1180 flag = RegExpFlags::UNICODE;
1181 break;
1182 }
1183 case LEX_CHAR_LOWERCASE_Y: {
1184 flag = RegExpFlags::STICKY;
1185 break;
1186 }
1187 case LEX_CHAR_SP: {
1188 return resultFlags;
1189 }
1190 default: {
1191 LogError(diagnostic::INVALID_REGEX_FLAG);
1192 return resultFlags;
1193 }
1194 }
1195
1196 if (flag == RegExpFlags::EMPTY || (resultFlags & flag) != 0) {
1197 LogError(diagnostic::INVALID_REGEX_FLAG);
1198 }
1199
1200 resultFlags = resultFlags | flag;
1201 }
1202
1203 return resultFlags;
1204 }
1205
CheckOctal()1206 void Lexer::CheckOctal()
1207 {
1208 switch (Iterator().Peek()) {
1209 case LEX_CHAR_8:
1210 case LEX_CHAR_9: {
1211 LogError(diagnostic::INVALID_OCTAL_DIGIT);
1212 break;
1213 }
1214 default: {
1215 break;
1216 }
1217 }
1218 }
1219
ScanRegExp()1220 RegExp Lexer::ScanRegExp()
1221 {
1222 // for proper handling such regexps as /=/
1223 if (GetToken().Type() == lexer::TokenType::PUNCTUATOR_DIVIDE_EQUAL) {
1224 Iterator().Backward(1);
1225 }
1226
1227 GetToken().type_ = TokenType::LITERAL_REGEXP;
1228 GetToken().keywordType_ = TokenType::LITERAL_REGEXP;
1229
1230 const auto patternStart = Iterator().Index();
1231 if (!ScanRegExpPattern()) {
1232 return {nullptr, nullptr, RegExpFlags::EMPTY};
1233 }
1234
1235 const auto pattern = SourceView(patternStart, Iterator().Index());
1236
1237 ES2PANDA_ASSERT(Iterator().Peek() == LEX_CHAR_SLASH);
1238 Iterator().Forward(1);
1239
1240 const auto flagsStart = Iterator().Index();
1241 RegExpFlags resultFlags = ScanRegExpFlags();
1242 const auto flags = SourceView(flagsStart, Iterator().Index());
1243
1244 SkipWhiteSpaces();
1245 SetTokenEnd();
1246
1247 return {pattern, flags, resultFlags};
1248 }
1249
CheckArrow()1250 bool Lexer::CheckArrow()
1251 {
1252 if (Iterator().Peek() != LEX_CHAR_EQUALS) {
1253 return false;
1254 }
1255 Iterator().Forward(1);
1256
1257 bool res = Iterator().Peek() == LEX_CHAR_GREATER_THAN;
1258 Iterator().Backward(1);
1259
1260 return res;
1261 }
1262
SetTokenStart()1263 void Lexer::SetTokenStart()
1264 {
1265 if (pos_.nextTokenLine_ != 0) {
1266 pos_.line_ += pos_.nextTokenLine_;
1267 pos_.nextTokenLine_ = 0;
1268 GetToken().flags_ = TokenFlags::NEW_LINE;
1269 } else {
1270 GetToken().flags_ = TokenFlags::NONE;
1271 }
1272
1273 pos_.token_.loc_.start = SourcePosition {Iterator().Index(), pos_.line_, parserContext_->GetProgram()};
1274 GetToken().keywordType_ = TokenType::EOS;
1275 }
1276
SetTokenEnd()1277 void Lexer::SetTokenEnd()
1278 {
1279 pos_.token_.loc_.end = SourcePosition {Iterator().Index(), pos_.line_, parserContext_->GetProgram()};
1280 }
1281
SkipWhiteSpacesHelperSlash(char32_t * cp)1282 bool Lexer::SkipWhiteSpacesHelperSlash(char32_t *cp)
1283 {
1284 Iterator().Forward(1);
1285 *cp = Iterator().Peek();
1286 if (*cp == LEX_CHAR_SLASH || *cp == LEX_CHAR_ASTERISK) {
1287 Iterator().Forward(1);
1288 *cp == LEX_CHAR_SLASH ? SkipSingleLineComment() : SkipMultiLineComment();
1289 return true;
1290 }
1291
1292 Iterator().Backward(1);
1293 return false;
1294 }
1295
IsEnableParseJsdoc() const1296 bool Lexer::IsEnableParseJsdoc() const
1297 {
1298 return parserContext_->IsEnableJsdocParse();
1299 }
1300
IsValidJsDocStart(char32_t * cp)1301 bool Lexer::IsValidJsDocStart(char32_t *cp)
1302 {
1303 if (!IsEnableParseJsdoc()) {
1304 return false;
1305 }
1306
1307 for (size_t idx = 0; idx < JS_DOC_START_SIZE; ++idx) {
1308 Iterator().Forward(1);
1309 *cp = Iterator().Peek();
1310 if (*cp != JS_DOC_START_LEX[idx]) {
1311 Iterator().Backward(idx + 1);
1312 return false;
1313 }
1314 }
1315
1316 Iterator().Backward(JS_DOC_START_SIZE);
1317 return true;
1318 }
1319
IsValidJsDocEnd(char32_t * cp)1320 bool Lexer::IsValidJsDocEnd(char32_t *cp)
1321 {
1322 for (size_t idx = 0; idx < JS_DOC_END_SIZE; ++idx) {
1323 Iterator().Forward(1);
1324 *cp = Iterator().Peek();
1325 if (*cp != JS_DOC_END_LEX[idx]) {
1326 Iterator().Backward(idx + 1);
1327 return false;
1328 }
1329 }
1330
1331 Iterator().Backward(JS_DOC_END_SIZE);
1332 return true;
1333 }
1334
SkipWhiteSpacesHelperDefault(const char32_t & cp)1335 bool Lexer::SkipWhiteSpacesHelperDefault(const char32_t &cp)
1336 {
1337 if (cp < LEX_ASCII_MAX_BITS) {
1338 return false;
1339 }
1340
1341 size_t cpSize {};
1342
1343 char32_t ch = Iterator().PeekCp(&cpSize);
1344 switch (ch) {
1345 case LEX_CHAR_LS:
1346 case LEX_CHAR_PS:
1347 pos_.nextTokenLine_++;
1348 [[fallthrough]];
1349 case LEX_CHAR_NBSP:
1350 case LEX_CHAR_ZWNBSP:
1351 case LEX_CHAR_OGHAM:
1352 case LEX_CHAR_NARROW_NO_BREAK_SP:
1353 case LEX_CHAR_MATHEMATICAL_SP:
1354 case LEX_CHAR_IDEOGRAPHIC_SP:
1355 Iterator().Forward(cpSize);
1356 return true;
1357 default:
1358 if (ch >= LEX_CHAR_ENQUAD && ch <= LEX_CHAR_ZERO_WIDTH_SP) {
1359 Iterator().Forward(cpSize);
1360 return true;
1361 } else {
1362 return false;
1363 }
1364 }
1365 }
1366
SkipWhiteSpaces()1367 void Lexer::SkipWhiteSpaces()
1368 {
1369 while (true) {
1370 auto cp = Iterator().Peek();
1371
1372 switch (cp) {
1373 case LEX_CHAR_CR:
1374 Iterator().Forward(1);
1375
1376 if (Iterator().Peek() != LEX_CHAR_LF) {
1377 Iterator().Backward(1);
1378 }
1379
1380 [[fallthrough]];
1381 case LEX_CHAR_LF:
1382 Iterator().Forward(1);
1383 pos_.nextTokenLine_++;
1384 continue;
1385 case LEX_CHAR_VT:
1386 case LEX_CHAR_FF:
1387 case LEX_CHAR_SP:
1388 case LEX_CHAR_TAB:
1389 case LEX_CHAR_NEXT_LINE:
1390 Iterator().Forward(1);
1391 continue;
1392 case LEX_CHAR_SLASH:
1393 if ((GetContext()->Status() & parser::ParserStatus::ALLOW_JS_DOC_START) != 0 &&
1394 IsValidJsDocStart(&cp)) {
1395 return;
1396 }
1397 if (!SkipWhiteSpacesHelperSlash(&cp)) {
1398 return;
1399 }
1400 continue;
1401 default:
1402 if (!SkipWhiteSpacesHelperDefault(cp)) {
1403 return;
1404 }
1405 continue;
1406 }
1407 }
1408 }
1409
ScanHashMark()1410 void Lexer::ScanHashMark()
1411 {
1412 GetToken().type_ = TokenType::PUNCTUATOR_HASH_MARK;
1413 }
1414
ScanBackTick()1415 void Lexer::ScanBackTick()
1416 {
1417 GetToken().type_ = TokenType::PUNCTUATOR_BACK_TICK;
1418 SetTokenEnd();
1419 }
1420
1421 // NOLINTNEXTLINE(google-default-arguments)
NextToken(NextTokenFlags flags)1422 void Lexer::NextToken(NextTokenFlags flags)
1423 {
1424 JSKeywords kws(this, flags);
1425 NextToken(&kws);
1426 }
1427
ScanColonPunctuator()1428 void Lexer::ScanColonPunctuator()
1429 {
1430 GetToken().type_ = TokenType::PUNCTUATOR_COLON;
1431 }
1432
ScanDollarPunctuator()1433 bool Lexer::ScanDollarPunctuator()
1434 {
1435 return false;
1436 }
1437
1438 // CC-OFFNXT(huge_method,huge_cyclomatic_complexity,G.FUN.01-CPP) big switch-case, solid logic
1439 // NOLINTNEXTLINE(readability-function-size)
NextToken(Keywords * kws)1440 void Lexer::NextToken(Keywords *kws)
1441 {
1442 KeywordsUtil &kwu = kws->Util();
1443
1444 SetTokenStart();
1445
1446 auto cp = Iterator().Peek();
1447 Iterator().Forward(1);
1448
1449 switch (cp) {
1450 case LEX_CHAR_EXCLAMATION: {
1451 ScanExclamationPunctuator();
1452 break;
1453 }
1454 case LEX_CHAR_SINGLE_QUOTE: {
1455 ScanString<LEX_CHAR_SINGLE_QUOTE>();
1456 break;
1457 }
1458 case LEX_CHAR_DOUBLE_QUOTE: {
1459 ScanString<LEX_CHAR_DOUBLE_QUOTE>();
1460 break;
1461 }
1462 case LEX_CHAR_HASH_MARK: {
1463 ScanHashMark();
1464 break;
1465 }
1466 case LEX_CHAR_PERCENT: {
1467 ScanPercentPunctuator();
1468 break;
1469 }
1470 case LEX_CHAR_AMPERSAND: {
1471 ScanAmpersandPunctuator();
1472 break;
1473 }
1474 case LEX_CHAR_LEFT_PAREN: {
1475 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_PARENTHESIS;
1476 break;
1477 }
1478 case LEX_CHAR_RIGHT_PAREN: {
1479 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_PARENTHESIS;
1480 break;
1481 }
1482 case LEX_CHAR_ASTERISK: {
1483 ScanAsteriskPunctuator();
1484 break;
1485 }
1486 case LEX_CHAR_PLUS: {
1487 ScanPlusPunctuator();
1488 break;
1489 }
1490 case LEX_CHAR_COMMA: {
1491 GetToken().type_ = TokenType::PUNCTUATOR_COMMA;
1492 break;
1493 }
1494 case LEX_CHAR_MINUS: {
1495 ScanMinusPunctuator();
1496 break;
1497 }
1498 case LEX_CHAR_DOT: {
1499 ScanDotPunctuator(kwu);
1500 break;
1501 }
1502 case LEX_CHAR_SLASH: {
1503 ScanSlashPunctuator();
1504 break;
1505 }
1506 case LEX_CHAR_0: {
1507 if (Iterator().Peek() != LEX_CHAR_DOT) {
1508 ScanNumberLeadingZero((kwu.Flags() & NextTokenFlags::UNARY_MINUS) !=
1509 std::underlying_type_t<NextTokenFlags>(0U));
1510 break;
1511 }
1512 [[fallthrough]];
1513 }
1514 case LEX_CHAR_1:
1515 case LEX_CHAR_2:
1516 case LEX_CHAR_3:
1517 case LEX_CHAR_4:
1518 case LEX_CHAR_5:
1519 case LEX_CHAR_6:
1520 case LEX_CHAR_7:
1521 case LEX_CHAR_8:
1522 case LEX_CHAR_9: {
1523 ScanNumber((kwu.Flags() & NextTokenFlags::UNARY_MINUS) != std::underlying_type_t<NextTokenFlags>(0U));
1524 break;
1525 }
1526 case LEX_CHAR_COLON: {
1527 ScanColonPunctuator();
1528 break;
1529 }
1530 case LEX_CHAR_SEMICOLON: {
1531 GetToken().type_ = TokenType::PUNCTUATOR_SEMI_COLON;
1532 break;
1533 }
1534 case LEX_CHAR_LESS_THAN: {
1535 ScanLessThanPunctuator();
1536 break;
1537 }
1538 case LEX_CHAR_EQUALS: {
1539 ScanEqualsPunctuator();
1540 break;
1541 }
1542 case LEX_CHAR_GREATER_THAN: {
1543 ScanGreaterThanPunctuator();
1544 break;
1545 }
1546 case LEX_CHAR_QUESTION: {
1547 ScanQuestionPunctuator();
1548 break;
1549 }
1550 case LEX_CHAR_AT: {
1551 ScanAtPunctuator();
1552 break;
1553 }
1554 case LEX_CHAR_DOLLAR_SIGN:
1555 case LEX_CHAR_UPPERCASE_A:
1556 case LEX_CHAR_UPPERCASE_E:
1557 case LEX_CHAR_UPPERCASE_G:
1558 case LEX_CHAR_UPPERCASE_H:
1559 case LEX_CHAR_UPPERCASE_J:
1560 case LEX_CHAR_UPPERCASE_K:
1561 case LEX_CHAR_UPPERCASE_M:
1562 case LEX_CHAR_UPPERCASE_P:
1563 case LEX_CHAR_UPPERCASE_Q:
1564 case LEX_CHAR_UPPERCASE_R:
1565 case LEX_CHAR_UPPERCASE_T:
1566 case LEX_CHAR_UPPERCASE_U:
1567 case LEX_CHAR_UPPERCASE_V:
1568 case LEX_CHAR_UPPERCASE_W:
1569 case LEX_CHAR_UPPERCASE_X:
1570 case LEX_CHAR_UPPERCASE_Y:
1571 case LEX_CHAR_UPPERCASE_Z:
1572 case LEX_CHAR_UNDERSCORE: {
1573 kwu.ScanIdContinue();
1574 break;
1575 }
1576 case LEX_CHAR_LEFT_SQUARE: {
1577 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_SQUARE_BRACKET;
1578 break;
1579 }
1580 case LEX_CHAR_BACKSLASH: {
1581 GetToken().flags_ |= TokenFlags::HAS_ESCAPE;
1582
1583 if (Iterator().Peek() != LEX_CHAR_LOWERCASE_U) {
1584 LogError(diagnostic::INVALID_CHAR);
1585 break;
1586 }
1587
1588 cp = ScanUnicodeEscapeSequence();
1589 kwu.ScanIdentifierStart(kws, cp);
1590 break;
1591 }
1592 case LEX_CHAR_RIGHT_SQUARE: {
1593 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_SQUARE_BRACKET;
1594 break;
1595 }
1596 case LEX_CHAR_CIRCUMFLEX: {
1597 ScanCircumflexPunctuator();
1598 break;
1599 }
1600 case LEX_CHAR_BACK_TICK: {
1601 ScanBackTick();
1602 return;
1603 }
1604 case LEX_CHAR_LOWERCASE_C: {
1605 if (ScanCharLiteral()) {
1606 break;
1607 }
1608 }
1609 [[fallthrough]];
1610 case LEX_CHAR_LOWERCASE_A:
1611 case LEX_CHAR_LOWERCASE_B:
1612 case LEX_CHAR_LOWERCASE_D:
1613 case LEX_CHAR_LOWERCASE_E:
1614 case LEX_CHAR_LOWERCASE_F:
1615 case LEX_CHAR_LOWERCASE_G:
1616 case LEX_CHAR_LOWERCASE_H:
1617 case LEX_CHAR_LOWERCASE_I:
1618 case LEX_CHAR_LOWERCASE_J:
1619 case LEX_CHAR_LOWERCASE_K:
1620 case LEX_CHAR_LOWERCASE_L:
1621 case LEX_CHAR_LOWERCASE_M:
1622 case LEX_CHAR_LOWERCASE_N:
1623 case LEX_CHAR_LOWERCASE_O:
1624 case LEX_CHAR_LOWERCASE_P:
1625 case LEX_CHAR_LOWERCASE_Q:
1626 case LEX_CHAR_LOWERCASE_R:
1627 case LEX_CHAR_LOWERCASE_S:
1628 case LEX_CHAR_LOWERCASE_T:
1629 case LEX_CHAR_LOWERCASE_U:
1630 case LEX_CHAR_LOWERCASE_V:
1631 case LEX_CHAR_LOWERCASE_W:
1632 case LEX_CHAR_LOWERCASE_X:
1633 case LEX_CHAR_LOWERCASE_Y:
1634 case LEX_CHAR_LOWERCASE_Z:
1635 case LEX_CHAR_UPPERCASE_B:
1636 case LEX_CHAR_UPPERCASE_C:
1637 case LEX_CHAR_UPPERCASE_D:
1638 case LEX_CHAR_UPPERCASE_F:
1639 case LEX_CHAR_UPPERCASE_I:
1640 case LEX_CHAR_UPPERCASE_L:
1641 case LEX_CHAR_UPPERCASE_N:
1642 case LEX_CHAR_UPPERCASE_O:
1643 case LEX_CHAR_UPPERCASE_S: {
1644 kws->ScanKeyword(cp);
1645 break;
1646 }
1647 case LEX_CHAR_LEFT_BRACE: {
1648 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_BRACE;
1649
1650 if (tlCtx_ != nullptr) {
1651 tlCtx_->ConsumeLeftBrace();
1652 }
1653
1654 break;
1655 }
1656 case LEX_CHAR_VLINE: {
1657 ScanVLinePunctuator();
1658 break;
1659 }
1660 case LEX_CHAR_RIGHT_BRACE: {
1661 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_BRACE;
1662
1663 if (tlCtx_ != nullptr && tlCtx_->ConsumeRightBrace()) {
1664 SetTokenEnd();
1665 return;
1666 }
1667
1668 break;
1669 }
1670 case LEX_CHAR_TILDE: {
1671 GetToken().type_ = TokenType::PUNCTUATOR_TILDE;
1672 break;
1673 }
1674 default: {
1675 Iterator().Backward(1);
1676 if (cp == util::StringView::Iterator::INVALID_CP) {
1677 GetToken().type_ = TokenType::EOS;
1678 break;
1679 }
1680
1681 cp = Iterator().Next();
1682 kwu.ScanIdentifierStart(kws, cp);
1683 break;
1684 }
1685 }
1686
1687 SetTokenEnd();
1688 SkipWhiteSpaces();
1689 }
1690
ScanNumberLeadingZeroImplNonAllowedCases()1691 void Lexer::ScanNumberLeadingZeroImplNonAllowedCases()
1692 {
1693 switch (Iterator().Peek()) {
1694 case LEX_CHAR_0:
1695 case LEX_CHAR_1:
1696 case LEX_CHAR_2:
1697 case LEX_CHAR_3:
1698 case LEX_CHAR_4:
1699 case LEX_CHAR_5:
1700 case LEX_CHAR_6:
1701 case LEX_CHAR_7: {
1702 LogError(diagnostic::IMPLICIT_OCTAL_NOT_ALLOWED);
1703 break;
1704 }
1705 case LEX_CHAR_8:
1706 case LEX_CHAR_9: {
1707 LogError(diagnostic::NON_OCTAL_DECIAML_INTEGER_LIT_IN_STRICT_MODE);
1708 break;
1709 }
1710 case LEX_CHAR_UNDERSCORE: {
1711 LogError(diagnostic::NUMERIC_SEP_UNDERSCORE_IN_NUMBER);
1712 break;
1713 }
1714 default: {
1715 break;
1716 }
1717 }
1718 }
1719
HandleNewlineHelper(util::UString * str,size_t * escapeEnd)1720 void Lexer::HandleNewlineHelper(util::UString *str, size_t *escapeEnd)
1721 {
1722 GetToken().flags_ |= TokenFlags::HAS_ESCAPE;
1723 str->Append(SourceView(*escapeEnd, Iterator().Index()));
1724
1725 if (Iterator().Peek() == LEX_CHAR_CR) {
1726 Iterator().Forward(1);
1727 if (Iterator().Peek() != LEX_CHAR_LF) {
1728 Iterator().Backward(1);
1729 }
1730 }
1731
1732 pos_.line_++;
1733 str->Append(LEX_CHAR_LF);
1734 Iterator().Forward(1);
1735 *escapeEnd = Iterator().Index();
1736 }
1737
HandleBackslashHelper(util::UString * str,size_t * escapeEnd)1738 bool Lexer::HandleBackslashHelper(util::UString *str, size_t *escapeEnd)
1739 {
1740 GetToken().flags_ |= TokenFlags::HAS_ESCAPE;
1741 str->Append(SourceView(*escapeEnd, Iterator().Index()));
1742 Iterator().Forward(1);
1743 bool scanned = ScanStringUnicodePart(str);
1744 *escapeEnd = Iterator().Index();
1745 return scanned;
1746 }
1747
HandleDollarSignHelper(const char32_t & end)1748 bool Lexer::HandleDollarSignHelper(const char32_t &end)
1749 {
1750 Iterator().Forward(1);
1751 if (end == LEX_CHAR_BACK_TICK) {
1752 if (Iterator().Peek() == LEX_CHAR_LEFT_BRACE) {
1753 Iterator().Backward(1);
1754 return true;
1755 }
1756 }
1757 return false;
1758 }
1759
HandleDoubleQuoteHelper(const char32_t & end,const char32_t & cp)1760 bool Lexer::HandleDoubleQuoteHelper(const char32_t &end, const char32_t &cp)
1761 {
1762 if (end == cp) {
1763 return false;
1764 }
1765 Iterator().Forward(1);
1766 return true;
1767 }
1768
PrepareStringTokenHelper()1769 void Lexer::PrepareStringTokenHelper()
1770 {
1771 GetToken().type_ = TokenType::LITERAL_STRING;
1772 GetToken().keywordType_ = TokenType::LITERAL_STRING;
1773 }
1774
FinalizeTokenHelper(util::UString * str,const size_t & startPos,size_t escapeEnd,bool finalize)1775 void Lexer::FinalizeTokenHelper(util::UString *str, const size_t &startPos, size_t escapeEnd, bool finalize)
1776 {
1777 if (!finalize) {
1778 return;
1779 }
1780
1781 if ((GetToken().flags_ & TokenFlags::HAS_ESCAPE) != 0U) {
1782 str->Append(SourceView(escapeEnd, Iterator().Index()));
1783 GetToken().src_ = str->View();
1784 } else {
1785 GetToken().src_ = SourceView(startPos, Iterator().Index());
1786 }
1787 }
1788
FinalizeJsDocInfoHelper(util::UString * str,const size_t & startPos,size_t escapeEnd)1789 void Lexer::FinalizeJsDocInfoHelper(util::UString *str, const size_t &startPos, size_t escapeEnd)
1790 {
1791 if ((GetToken().flags_ & TokenFlags::HAS_ESCAPE) != 0U) {
1792 str->Append(SourceView(escapeEnd, Iterator().Index()));
1793 } else {
1794 str->Append(SourceView(startPos, Iterator().Index()));
1795 }
1796 }
1797
Pos()1798 LexerPosition &Lexer::Pos()
1799 {
1800 return pos_;
1801 }
1802
Pos() const1803 const LexerPosition &Lexer::Pos() const
1804 {
1805 return pos_;
1806 }
1807 } // namespace ark::es2panda::lexer
1808