1 /**
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "lexer.h"
17
18 #include <gen/keywords.h>
19 #include <parser/context/parserContext.h>
20
21 namespace panda::es2panda::lexer {
22
LexerPosition(const util::StringView & source)23 LexerPosition::LexerPosition(const util::StringView &source) : iterator(source) {}
24
Lexer(const parser::ParserContext * parserContext)25 Lexer::Lexer(const parser::ParserContext *parserContext)
26 : allocator_(parserContext->GetProgram()->Allocator()),
27 parserContext_(parserContext),
28 source_(parserContext->GetProgram()->SourceCode()),
29 pos_(source_)
30 {
31 SkipWhiteSpaces();
32 }
33
ScanUnicodeEscapeSequence()34 char32_t Lexer::ScanUnicodeEscapeSequence()
35 {
36 ASSERT(Iterator().Peek() == LEX_CHAR_LOWERCASE_U);
37
38 Iterator().Forward(1);
39
40 if (Iterator().Peek() == LEX_CHAR_LEFT_BRACE) {
41 Iterator().Forward(1);
42 return ScanUnicodeCodePointEscape();
43 }
44
45 // 4: a template parameter to the expected fixed length when scanning Unicode escape sequences
46 return ScanHexEscape<4>();
47 }
48
ScanUnicodeCodePointEscape()49 char32_t Lexer::ScanUnicodeCodePointEscape()
50 {
51 double code = 0;
52 char32_t cp {};
53
54 while (true) {
55 cp = Iterator().Peek();
56 if (!IsHexDigit(cp)) {
57 break;
58 }
59
60 Iterator().Forward(1);
61
62 constexpr auto multiplier = 16;
63 code = code * multiplier + HexValue(cp);
64 if (code > UNICODE_CODE_POINT_MAX) {
65 if (CheckTokenIsTaggedTemplate()) {
66 AssignTokenEscapeError();
67 break;
68 }
69 ThrowError("Invalid unicode escape sequence");
70 }
71 }
72 if (cp != LEX_CHAR_RIGHT_BRACE) {
73 if (CheckTokenIsTaggedTemplate()) {
74 AssignTokenEscapeError();
75 return static_cast<char32_t>(code);
76 } else {
77 ThrowError("Invalid unicode escape sequence");
78 }
79 }
80
81 Iterator().Forward(1);
82 return static_cast<char32_t>(code);
83 }
84
Allocator()85 ArenaAllocator *Lexer::Allocator()
86 {
87 return allocator_;
88 }
89
GetToken()90 Token &Lexer::GetToken()
91 {
92 return pos_.token;
93 }
94
GetToken() const95 const Token &Lexer::GetToken() const
96 {
97 return pos_.token;
98 }
99
Line() const100 size_t Lexer::Line() const
101 {
102 return pos_.line;
103 }
104
Save() const105 LexerPosition Lexer::Save() const
106 {
107 return pos_;
108 }
109
BackwardToken(TokenType type,size_t offset)110 void Lexer::BackwardToken(TokenType type, size_t offset)
111 {
112 pos_.token.type_ = type;
113 pos_.iterator.Reset(GetToken().End().index - offset);
114 pos_.nextTokenLine = 0;
115 }
116
ForwardToken(TokenType type,size_t offset)117 void Lexer::ForwardToken(TokenType type, size_t offset)
118 {
119 SetTokenStart();
120 pos_.token.type_ = type;
121 pos_.iterator.Forward(offset);
122 SetTokenEnd();
123 SkipWhiteSpaces();
124 }
125
Rewind(const LexerPosition & pos)126 void Lexer::Rewind(const LexerPosition &pos)
127 {
128 pos_ = pos;
129 }
130
Lookahead()131 char32_t Lexer::Lookahead()
132 {
133 return Iterator().Peek();
134 }
135
SourceView(const util::StringView::Iterator & begin,const util::StringView::Iterator & end) const136 util::StringView Lexer::SourceView(const util::StringView::Iterator &begin, const util::StringView::Iterator &end) const
137 {
138 return SourceView(begin.Index(), end.Index());
139 }
140
SourceView(size_t begin,size_t end) const141 util::StringView Lexer::SourceView(size_t begin, size_t end) const
142 {
143 return source_.Substr(begin, end);
144 }
145
SkipMultiLineComment()146 void Lexer::SkipMultiLineComment()
147 {
148 while (true) {
149 switch (Iterator().Next()) {
150 case util::StringView::Iterator::INVALID_CP: {
151 ThrowError("Unterminated multi-line comment");
152 break;
153 }
154 case LEX_CHAR_CR: {
155 if (Iterator().Peek() == LEX_CHAR_LF) {
156 Iterator().Forward(1);
157 }
158
159 [[fallthrough]];
160 }
161 case LEX_CHAR_LF:
162 case LEX_CHAR_LS:
163 case LEX_CHAR_PS: {
164 pos_.nextTokenLine++;
165 continue;
166 }
167 case LEX_CHAR_ASTERISK: {
168 if (Iterator().Peek() == LEX_CHAR_SLASH) {
169 Iterator().Forward(1);
170 return;
171 }
172
173 break;
174 }
175 default: {
176 break;
177 }
178 }
179 }
180 }
181
182 /* New line character is not processed */
SkipSingleLineComment()183 void Lexer::SkipSingleLineComment()
184 {
185 while (true) {
186 // INVALID_CP may appear in the middle of a comment
187 // It can not be used to determine the end of the comment.
188 if (!Iterator().HasNext()) {
189 Iterator().Next();
190 pos_.nextTokenLine++;
191 return;
192 }
193 switch (Iterator().Next()) {
194 case util::StringView::Iterator::INVALID_CP: {
195 // This return means if an INVALID_CP appeared in a single comment,
196 // it will terminates single comment,
197 // but INVALID_CP should not terminate single comment,
198 // only the end of the Iterator can terminates single comment and add pos_.nextTokenLine,
199 // this return should be removed in another issure because it is an incompatible bug fix.
200 return;
201 }
202 case LEX_CHAR_CR: {
203 if (Iterator().Peek() == LEX_CHAR_LF) {
204 Iterator().Forward(1);
205 }
206
207 [[fallthrough]];
208 }
209 case LEX_CHAR_LF:
210 case LEX_CHAR_LS:
211 case LEX_CHAR_PS: {
212 pos_.nextTokenLine++;
213 return;
214 }
215 default: {
216 break;
217 }
218 }
219 }
220 }
221
ThrowError(std::string_view message)222 void Lexer::ThrowError(std::string_view message)
223 {
224 lexer::LineIndex lineIndex = parserContext_->GetProgram()->GetLineIndex();
225 SourceLocation loc = lineIndex.GetLocation(SourcePosition(Iterator().Index(), pos_.line + pos_.nextTokenLine));
226 throw es2panda::Error(es2panda::ErrorType::SYNTAX, message, loc.line, loc.col);
227 }
228
CheckNumberLiteralEnd()229 void Lexer::CheckNumberLiteralEnd()
230 {
231 if (Iterator().Peek() == LEX_CHAR_LOWERCASE_N) {
232 Iterator().Forward(1);
233 GetToken().flags_ |= TokenFlags::NUMBER_BIGINT;
234 }
235
236 GetToken().src_ = SourceView(GetToken().Start().index, Iterator().Index());
237 const auto nextCp = Iterator().PeekCp();
238 if (KeywordsUtil::IsIdentifierStart(nextCp) || IsDecimalDigit(nextCp)) {
239 ThrowError("Invalid numeric literal");
240 }
241 }
242
ScanNumberLeadingZero()243 void Lexer::ScanNumberLeadingZero()
244 {
245 GetToken().type_ = TokenType::LITERAL_NUMBER;
246
247 switch (Iterator().Peek()) {
248 case LEX_CHAR_LOWERCASE_X:
249 case LEX_CHAR_UPPERCASE_X: {
250 Iterator().Forward(1);
251 constexpr auto RADIX = 16;
252 ScanNumberRadix<IsHexDigit, RADIX>();
253 CheckNumberLiteralEnd();
254 return;
255 }
256 case LEX_CHAR_LOWERCASE_B:
257 case LEX_CHAR_UPPERCASE_B: {
258 Iterator().Forward(1);
259 constexpr auto RADIX = 2;
260 ScanNumberRadix<IsBinaryDigit, RADIX>();
261 CheckNumberLiteralEnd();
262 return;
263 }
264 case LEX_CHAR_LOWERCASE_O:
265 case LEX_CHAR_UPPERCASE_O: {
266 Iterator().Forward(1);
267 constexpr auto RADIX = 8;
268 ScanNumberRadix<IsOctalDigit, RADIX>();
269
270 switch (Iterator().Peek()) {
271 case LEX_CHAR_8:
272 case LEX_CHAR_9: {
273 ThrowError("Invalid octal digit");
274 break;
275 }
276 default: {
277 break;
278 }
279 }
280
281 CheckNumberLiteralEnd();
282 return;
283 }
284 case LEX_CHAR_0:
285 case LEX_CHAR_1:
286 case LEX_CHAR_2:
287 case LEX_CHAR_3:
288 case LEX_CHAR_4:
289 case LEX_CHAR_5:
290 case LEX_CHAR_6:
291 case LEX_CHAR_7: {
292 ThrowError("Implicit octal literal not allowed");
293 break;
294 }
295 case LEX_CHAR_8:
296 case LEX_CHAR_9: {
297 ThrowError("NonOctalDecimalIntegerLiteral is not enabled in strict mode code");
298 break;
299 }
300 default: {
301 break;
302 }
303 }
304
305 ScanNumber(Iterator().Peek() == LEX_CHAR_0);
306 }
307
ScanDecimalNumbers(bool allowNumericSeparator)308 void Lexer::ScanDecimalNumbers(bool allowNumericSeparator)
309 {
310 bool allowNumericOnNext = true;
311
312 while (true) {
313 switch (Iterator().Peek()) {
314 case LEX_CHAR_0:
315 case LEX_CHAR_1:
316 case LEX_CHAR_2:
317 case LEX_CHAR_3:
318 case LEX_CHAR_4:
319 case LEX_CHAR_5:
320 case LEX_CHAR_6:
321 case LEX_CHAR_7:
322 case LEX_CHAR_8:
323 case LEX_CHAR_9: {
324 Iterator().Forward(1);
325 allowNumericOnNext = true;
326 break;
327 }
328 case LEX_CHAR_UNDERSCORE: {
329 Iterator().Backward(1);
330 isUnderscore_ = true;
331
332 if (Iterator().Peek() == LEX_CHAR_DOT || !allowNumericSeparator || !allowNumericOnNext) {
333 Iterator().Forward(1);
334 ThrowError("Invalid numeric separator");
335 }
336
337 GetToken().flags_ |= TokenFlags::NUMBER_HAS_UNDERSCORE;
338 Iterator().Forward(2);
339 allowNumericOnNext = false;
340 break;
341 }
342 default: {
343 if (!allowNumericOnNext) {
344 ThrowError("Numeric separators are not allowed at the end of numeric literals");
345 }
346 return;
347 }
348 }
349 }
350 }
351
ConvertNumber(size_t exponentSignPos)352 void Lexer::ConvertNumber(size_t exponentSignPos)
353 {
354 util::StringView sv = SourceView(GetToken().Start().index, Iterator().Index());
355 std::string utf8 = std::string {sv.Utf8()};
356 bool needConversion = false;
357
358 if (exponentSignPos != std::numeric_limits<size_t>::max()) {
359 utf8.insert(exponentSignPos, 1, '+');
360 needConversion = true;
361 }
362
363 if (GetToken().flags_ & TokenFlags::NUMBER_HAS_UNDERSCORE) {
364 utf8.erase(std::remove(utf8.begin(), utf8.end(), LEX_CHAR_UNDERSCORE), utf8.end());
365 needConversion = true;
366 }
367
368 if (needConversion) {
369 util::UString converted(utf8, Allocator());
370 GetToken().src_ = converted.View();
371 } else {
372 GetToken().src_ = sv;
373 }
374
375 errno = 0;
376 char *endptr = nullptr;
377 double value = std::strtod(utf8.c_str(), &endptr);
378
379 if (endptr == utf8.c_str()) {
380 ThrowError("Invalid number");
381 return;
382 }
383
384 /*
385 * Extreme value handling:
386 * If the number exceeds Number.MAX_VALUE (~1.7976931348623157e+308), treat it as Infinity / -Infinity
387 * If the number is smaller than Number.MIN_VALUE (5e-324), it may be rounded to 0.0 or Number.MIN_VALUE,
388 * depending on platform behavior.
389 */
390 if (errno == ERANGE && std::abs(value) > std::numeric_limits<double>::max()) {
391 value = (value > 0.0 ? std::numeric_limits<double>::infinity()
392 : -std::numeric_limits<double>::infinity());
393 }
394
395 GetToken().number_ = value;
396 }
ScanNumber(bool allowNumericSeparator,bool allowBigInt)397 void Lexer::ScanNumber(bool allowNumericSeparator, bool allowBigInt)
398 {
399 GetToken().type_ = TokenType::LITERAL_NUMBER;
400
401 ScanDecimalNumbers(allowNumericSeparator);
402
403 size_t exponentSignPos = std::numeric_limits<size_t>::max();
404 bool parseExponent = true;
405
406 if (Iterator().Peek() == LEX_CHAR_DOT) {
407 allowBigInt = false;
408 Iterator().Forward(1);
409
410 auto cp = Iterator().Peek();
411 if (IsDecimalDigit(cp) || cp == LEX_CHAR_LOWERCASE_E || LEX_CHAR_UPPERCASE_E) {
412 ScanDecimalNumbers(allowNumericSeparator);
413 } else {
414 parseExponent = false;
415 }
416 }
417
418 switch (Iterator().Peek()) {
419 case LEX_CHAR_LOWERCASE_E:
420 case LEX_CHAR_UPPERCASE_E: {
421 allowBigInt = false;
422
423 if (!parseExponent) {
424 break;
425 }
426
427 Iterator().Forward(1);
428
429 switch (Iterator().Peek()) {
430 case LEX_CHAR_UNDERSCORE: {
431 break;
432 }
433 case LEX_CHAR_PLUS:
434 case LEX_CHAR_MINUS: {
435 Iterator().Forward(1);
436 break;
437 }
438 default: {
439 exponentSignPos = Iterator().Index() - GetToken().Start().index;
440 break;
441 }
442 }
443
444 if (!IsDecimalDigit(Iterator().Peek())) {
445 ThrowError("Invalid numeric literal");
446 }
447 ScanDecimalNumbers(allowNumericSeparator);
448 break;
449 }
450 default: {
451 break;
452 }
453 }
454
455 CheckNumberLiteralEnd();
456
457 if (GetToken().flags_ & TokenFlags::NUMBER_BIGINT) {
458 if (!allowBigInt) {
459 ThrowError("Invalid BigInt number");
460 }
461 if (isUnderscore_) {
462 ConvertNumber(exponentSignPos);
463 isUnderscore_ = false;
464 }
465
466 return;
467 }
468
469 ConvertNumber(exponentSignPos);
470 }
471
PushTemplateContext(TemplateLiteralParserContext * ctx)472 void Lexer::PushTemplateContext(TemplateLiteralParserContext *ctx)
473 {
474 tlCtx_ = ctx;
475 }
476
ScanTemplateStringEnd()477 void Lexer::ScanTemplateStringEnd()
478 {
479 ASSERT(Iterator().Peek() == LEX_CHAR_BACK_TICK);
480 Iterator().Forward(1);
481 SetTokenEnd();
482 SkipWhiteSpaces();
483 }
484
ScanTemplateString()485 LexerTemplateString Lexer::ScanTemplateString()
486 {
487 LexerTemplateString templateStr(Allocator());
488 size_t cpSize = 0;
489
490 while (true) {
491 char32_t cp = Iterator().PeekCp(&cpSize);
492
493 switch (cp) {
494 case util::StringView::Iterator::INVALID_CP: {
495 ThrowError("Unexpected token, expected '${' or '`'");
496 break;
497 }
498 case LEX_CHAR_BACK_TICK: {
499 templateStr.end = Iterator().Index();
500 return templateStr;
501 }
502 case LEX_CHAR_CR: {
503 Iterator().Forward(1);
504
505 if (Iterator().Peek() != LEX_CHAR_LF) {
506 Iterator().Backward(1);
507 }
508
509 [[fallthrough]];
510 }
511 case LEX_CHAR_LF: {
512 pos_.line++;
513 templateStr.str.Append(LEX_CHAR_LF);
514 Iterator().Forward(1);
515 continue;
516 }
517 case LEX_CHAR_BACKSLASH: {
518 Iterator().Forward(1);
519
520 char32_t nextCp = Iterator().Peek();
521 if (nextCp == LEX_CHAR_BACK_TICK || nextCp == LEX_CHAR_BACKSLASH || nextCp == LEX_CHAR_DOLLAR_SIGN) {
522 templateStr.str.Append(cp);
523 templateStr.str.Append(nextCp);
524 Iterator().Forward(1);
525 continue;
526 }
527
528 Iterator().Backward(1);
529 break;
530 }
531 case LEX_CHAR_DOLLAR_SIGN: {
532 templateStr.end = Iterator().Index();
533 Iterator().Forward(1);
534
535 if (Iterator().Peek() == LEX_CHAR_LEFT_BRACE) {
536 Iterator().Forward(1);
537 templateStr.scanExpression = true;
538 SkipWhiteSpaces();
539 return templateStr;
540 }
541
542 templateStr.str.Append(cp);
543 continue;
544 }
545 default: {
546 break;
547 }
548 }
549
550 templateStr.str.Append(cp);
551 Iterator().Forward(cpSize);
552 }
553
554 UNREACHABLE();
555 return templateStr;
556 }
557
ResetTokenEnd()558 void Lexer::ResetTokenEnd()
559 {
560 SetTokenStart();
561 pos_.iterator.Reset(GetToken().End().index);
562 pos_.line = GetToken().End().line;
563 pos_.nextTokenLine = 0;
564 }
565
ScanStringUnicodePart(util::UString * str)566 void Lexer::ScanStringUnicodePart(util::UString *str)
567 {
568 size_t cpSize {};
569 char32_t cp = Iterator().PeekCp(&cpSize);
570
571 switch (cp) {
572 case util::StringView::Iterator::INVALID_CP: {
573 ThrowError("Unterminated string");
574 break;
575 }
576 case LEX_CHAR_CR: {
577 Iterator().Forward(1);
578 if (Iterator().Peek() != LEX_CHAR_LF) {
579 Iterator().Backward(1);
580 }
581
582 [[fallthrough]];
583 }
584 case LEX_CHAR_LS:
585 case LEX_CHAR_PS:
586 case LEX_CHAR_LF: {
587 pos_.line++;
588 Iterator().Forward(cpSize);
589 return;
590 }
591 case LEX_CHAR_LOWERCASE_B: {
592 cp = LEX_CHAR_BS;
593 break;
594 }
595 case LEX_CHAR_LOWERCASE_T: {
596 cp = LEX_CHAR_TAB;
597 break;
598 }
599 case LEX_CHAR_LOWERCASE_N: {
600 cp = LEX_CHAR_LF;
601 break;
602 }
603 case LEX_CHAR_LOWERCASE_V: {
604 cp = LEX_CHAR_VT;
605 break;
606 }
607 case LEX_CHAR_LOWERCASE_F: {
608 cp = LEX_CHAR_FF;
609 break;
610 }
611 case LEX_CHAR_LOWERCASE_R: {
612 cp = LEX_CHAR_CR;
613 break;
614 }
615 case LEX_CHAR_LOWERCASE_X: {
616 Iterator().Forward(1);
617 // 2: a template parameter to the expected fixed length when scanning Unicode escape sequences
618 str->Append(ScanHexEscape<2>());
619 return;
620 }
621 case LEX_CHAR_LOWERCASE_U: {
622 cp = ScanUnicodeEscapeSequence();
623 str->Append(cp);
624 return;
625 }
626 case LEX_CHAR_0: {
627 Iterator().Forward(1);
628 bool isDecimal = IsDecimalDigit(Iterator().Peek());
629 bool isOctal = IsOctalDigit(Iterator().Peek());
630 Iterator().Backward(1);
631
632 if (!isDecimal) {
633 cp = LEX_CHAR_NULL;
634 break;
635 }
636
637 if (isOctal) {
638 if (CheckTokenIsTaggedTemplate()) {
639 AssignTokenEscapeError();
640 break;
641 }
642 ThrowError("Octal escape sequences are not allowed in strict mode");
643 }
644
645 [[fallthrough]];
646 }
647 default: {
648 if (IsDecimalDigit(Iterator().Peek())) {
649 if (CheckTokenIsTaggedTemplate()) {
650 AssignTokenEscapeError();
651 break;
652 }
653 ThrowError("Invalid character escape sequence in strict mode");
654 }
655
656 break;
657 }
658 }
659
660 Iterator().Forward(cpSize);
661 str->Append(cp);
662 }
663
ScanQuestionPunctuator()664 void Lexer::ScanQuestionPunctuator()
665 {
666 GetToken().type_ = TokenType::PUNCTUATOR_QUESTION_MARK;
667
668 switch (Iterator().Peek()) {
669 case LEX_CHAR_QUESTION: {
670 GetToken().type_ = TokenType::PUNCTUATOR_NULLISH_COALESCING;
671 Iterator().Forward(1);
672
673 switch (Iterator().Peek()) {
674 case LEX_CHAR_EQUALS: {
675 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_NULLISH_EQUAL;
676 Iterator().Forward(1);
677 break;
678 }
679 default: {
680 break;
681 }
682 }
683
684 break;
685 }
686 case LEX_CHAR_DOT: {
687 Iterator().Forward(1);
688
689 if (!IsDecimalDigit(Iterator().Peek())) {
690 GetToken().type_ = TokenType::PUNCTUATOR_QUESTION_DOT;
691 return;
692 }
693
694 Iterator().Backward(1);
695 break;
696 }
697 default: {
698 break;
699 }
700 }
701 }
702
ScanLessThanPunctuator()703 void Lexer::ScanLessThanPunctuator()
704 {
705 GetToken().type_ = TokenType::PUNCTUATOR_LESS_THAN;
706
707 switch (Iterator().Peek()) {
708 case LEX_CHAR_LESS_THAN: {
709 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_SHIFT;
710 Iterator().Forward(1);
711
712 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
713 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_SHIFT_EQUAL;
714 Iterator().Forward(1);
715 }
716 break;
717 }
718 case LEX_CHAR_EQUALS: {
719 GetToken().type_ = TokenType::PUNCTUATOR_LESS_THAN_EQUAL;
720 Iterator().Forward(1);
721 break;
722 }
723 default: {
724 break;
725 }
726 }
727 }
728
ScanGreaterThanPunctuator()729 void Lexer::ScanGreaterThanPunctuator()
730 {
731 GetToken().type_ = TokenType::PUNCTUATOR_GREATER_THAN;
732
733 switch (Iterator().Peek()) {
734 case LEX_CHAR_GREATER_THAN: {
735 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_SHIFT;
736 Iterator().Forward(1);
737
738 switch (Iterator().Peek()) {
739 case LEX_CHAR_GREATER_THAN: {
740 GetToken().type_ = TokenType::PUNCTUATOR_UNSIGNED_RIGHT_SHIFT;
741 Iterator().Forward(1);
742
743 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
744 GetToken().type_ = TokenType::PUNCTUATOR_UNSIGNED_RIGHT_SHIFT_EQUAL;
745 Iterator().Forward(1);
746 }
747 break;
748 }
749 case LEX_CHAR_EQUALS: {
750 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_SHIFT_EQUAL;
751 Iterator().Forward(1);
752 break;
753 }
754 default: {
755 break;
756 }
757 }
758 break;
759 }
760 case LEX_CHAR_EQUALS: {
761 GetToken().type_ = TokenType::PUNCTUATOR_GREATER_THAN_EQUAL;
762 Iterator().Forward(1);
763 break;
764 }
765 default: {
766 break;
767 }
768 }
769 }
770
ScanEqualsPunctuator()771 void Lexer::ScanEqualsPunctuator()
772 {
773 GetToken().type_ = TokenType::PUNCTUATOR_SUBSTITUTION;
774
775 switch (Iterator().Peek()) {
776 case LEX_CHAR_EQUALS: {
777 GetToken().type_ = TokenType::PUNCTUATOR_EQUAL;
778 Iterator().Forward(1);
779
780 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
781 GetToken().type_ = TokenType::PUNCTUATOR_STRICT_EQUAL;
782 Iterator().Forward(1);
783 }
784 break;
785 }
786 case LEX_CHAR_GREATER_THAN: {
787 GetToken().type_ = TokenType::PUNCTUATOR_ARROW;
788 Iterator().Forward(1);
789 break;
790 }
791 default: {
792 break;
793 }
794 }
795 }
796
ScanExclamationPunctuator()797 void Lexer::ScanExclamationPunctuator()
798 {
799 GetToken().type_ = TokenType::PUNCTUATOR_EXCLAMATION_MARK;
800
801 switch (Iterator().Peek()) {
802 case LEX_CHAR_EQUALS: {
803 GetToken().type_ = TokenType::PUNCTUATOR_NOT_EQUAL;
804 Iterator().Forward(1);
805
806 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
807 GetToken().type_ = TokenType::PUNCTUATOR_NOT_STRICT_EQUAL;
808 Iterator().Forward(1);
809 }
810 break;
811 }
812 default: {
813 break;
814 }
815 }
816 }
817
ScanAmpersandPunctuator()818 void Lexer::ScanAmpersandPunctuator()
819 {
820 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_AND;
821
822 switch (Iterator().Peek()) {
823 case LEX_CHAR_AMPERSAND: {
824 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_AND;
825 Iterator().Forward(1);
826
827 switch (Iterator().Peek()) {
828 case LEX_CHAR_EQUALS: {
829 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_AND_EQUAL;
830 Iterator().Forward(1);
831 break;
832 }
833 default: {
834 break;
835 }
836 }
837
838 break;
839 }
840 case LEX_CHAR_EQUALS: {
841 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_AND_EQUAL;
842 Iterator().Forward(1);
843 break;
844 }
845 default: {
846 break;
847 }
848 }
849 }
850
ScanVLinePunctuator()851 void Lexer::ScanVLinePunctuator()
852 {
853 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_OR;
854
855 switch (Iterator().Peek()) {
856 case LEX_CHAR_VLINE: {
857 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_OR;
858 Iterator().Forward(1);
859
860 switch (Iterator().Peek()) {
861 case LEX_CHAR_EQUALS: {
862 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_OR_EQUAL;
863 Iterator().Forward(1);
864 break;
865 }
866 default: {
867 break;
868 }
869 }
870
871 break;
872 }
873 case LEX_CHAR_EQUALS: {
874 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_OR_EQUAL;
875 Iterator().Forward(1);
876 break;
877 }
878 default: {
879 break;
880 }
881 }
882 }
883
ScanCircumflexPunctuator()884 void Lexer::ScanCircumflexPunctuator()
885 {
886 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_XOR;
887
888 switch (Iterator().Peek()) {
889 case LEX_CHAR_EQUALS: {
890 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_XOR_EQUAL;
891 Iterator().Forward(1);
892 break;
893 }
894 default: {
895 break;
896 }
897 }
898 }
899
ScanPlusPunctuator()900 void Lexer::ScanPlusPunctuator()
901 {
902 GetToken().type_ = TokenType::PUNCTUATOR_PLUS;
903
904 switch (Iterator().Peek()) {
905 case LEX_CHAR_PLUS: {
906 GetToken().type_ = TokenType::PUNCTUATOR_PLUS_PLUS;
907 Iterator().Forward(1);
908 break;
909 }
910 case LEX_CHAR_EQUALS: {
911 GetToken().type_ = TokenType::PUNCTUATOR_PLUS_EQUAL;
912 Iterator().Forward(1);
913 break;
914 }
915 default: {
916 break;
917 }
918 }
919 }
920
ScanMinusPunctuator()921 void Lexer::ScanMinusPunctuator()
922 {
923 GetToken().type_ = TokenType::PUNCTUATOR_MINUS;
924
925 switch (Iterator().Peek()) {
926 case LEX_CHAR_MINUS: {
927 GetToken().type_ = TokenType::PUNCTUATOR_MINUS_MINUS;
928 Iterator().Forward(1);
929 break;
930 }
931 case LEX_CHAR_EQUALS: {
932 GetToken().type_ = TokenType::PUNCTUATOR_MINUS_EQUAL;
933 Iterator().Forward(1);
934 break;
935 }
936 default: {
937 break;
938 }
939 }
940 }
941
ScanSlashPunctuator()942 void Lexer::ScanSlashPunctuator()
943 {
944 GetToken().type_ = TokenType::PUNCTUATOR_DIVIDE;
945
946 switch (Iterator().Peek()) {
947 case LEX_CHAR_EQUALS: {
948 GetToken().type_ = TokenType::PUNCTUATOR_DIVIDE_EQUAL;
949 Iterator().Forward(1);
950 break;
951 }
952 default: {
953 break;
954 }
955 }
956 }
957
ScanDotPunctuator()958 void Lexer::ScanDotPunctuator()
959 {
960 GetToken().type_ = TokenType::PUNCTUATOR_PERIOD;
961
962 switch (Iterator().Peek()) {
963 case LEX_CHAR_0:
964 case LEX_CHAR_1:
965 case LEX_CHAR_2:
966 case LEX_CHAR_3:
967 case LEX_CHAR_4:
968 case LEX_CHAR_5:
969 case LEX_CHAR_6:
970 case LEX_CHAR_7:
971 case LEX_CHAR_8:
972 case LEX_CHAR_9: {
973 ScanNumber();
974 break;
975 }
976 case LEX_CHAR_QUESTION: {
977 GetToken().type_ = TokenType::PUNCTUATOR_PERIOD_QUESTION;
978 Iterator().Forward(1);
979 break;
980 }
981 case LEX_CHAR_DOT: {
982 Iterator().Forward(1);
983
984 if (Iterator().Peek() == LEX_CHAR_DOT) {
985 GetToken().type_ = TokenType::PUNCTUATOR_PERIOD_PERIOD_PERIOD;
986 Iterator().Forward(1);
987 break;
988 }
989
990 Iterator().Backward(1);
991 break;
992 }
993 default: {
994 break;
995 }
996 }
997 }
998
ScanAsterixPunctuator()999 void Lexer::ScanAsterixPunctuator()
1000 {
1001 GetToken().type_ = TokenType::PUNCTUATOR_MULTIPLY;
1002
1003 switch (Iterator().Peek()) {
1004 case LEX_CHAR_ASTERISK: {
1005 GetToken().type_ = TokenType::PUNCTUATOR_EXPONENTIATION;
1006 Iterator().Forward(1);
1007
1008 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
1009 GetToken().type_ = TokenType::PUNCTUATOR_EXPONENTIATION_EQUAL;
1010 Iterator().Forward(1);
1011 }
1012 break;
1013 }
1014 case LEX_CHAR_EQUALS: {
1015 GetToken().type_ = TokenType::PUNCTUATOR_MULTIPLY_EQUAL;
1016 Iterator().Forward(1);
1017 break;
1018 }
1019 default: {
1020 break;
1021 }
1022 }
1023 }
1024
ScanPercentPunctuator()1025 void Lexer::ScanPercentPunctuator()
1026 {
1027 GetToken().type_ = TokenType::PUNCTUATOR_MOD;
1028
1029 switch (Iterator().Peek()) {
1030 case LEX_CHAR_EQUALS: {
1031 GetToken().type_ = TokenType::PUNCTUATOR_MOD_EQUAL;
1032 Iterator().Forward(1);
1033 break;
1034 }
1035 default: {
1036 break;
1037 }
1038 }
1039 }
1040
IsLineTerminatorOrEos() const1041 bool Lexer::IsLineTerminatorOrEos() const
1042 {
1043 switch (Iterator().PeekCp()) {
1044 case util::StringView::Iterator::INVALID_CP:
1045 case LEX_CHAR_LF:
1046 case LEX_CHAR_CR:
1047 case LEX_CHAR_LS:
1048 case LEX_CHAR_PS: {
1049 return true;
1050 }
1051 default: {
1052 break;
1053 }
1054 }
1055
1056 return false;
1057 }
1058
ScanRegExpPattern()1059 void Lexer::ScanRegExpPattern()
1060 {
1061 bool isCharClass = false;
1062 size_t cpSize {};
1063
1064 while (true) {
1065 switch (Iterator().PeekCp(&cpSize)) {
1066 case util::StringView::Iterator::INVALID_CP:
1067 case LEX_CHAR_LF:
1068 case LEX_CHAR_CR:
1069 case LEX_CHAR_LS:
1070 case LEX_CHAR_PS: {
1071 ThrowError("Unterminated RegExp");
1072 break;
1073 }
1074 case LEX_CHAR_SLASH: {
1075 if (!isCharClass) {
1076 return;
1077 }
1078
1079 break;
1080 }
1081 case LEX_CHAR_LEFT_SQUARE: {
1082 isCharClass = true;
1083 break;
1084 }
1085 case LEX_CHAR_RIGHT_SQUARE: {
1086 isCharClass = false;
1087 break;
1088 }
1089 case LEX_CHAR_BACKSLASH: {
1090 Iterator().Forward(1);
1091
1092 if (IsLineTerminatorOrEos()) {
1093 continue;
1094 }
1095
1096 Iterator().PeekCp(&cpSize);
1097 Iterator().Forward(cpSize);
1098 continue;
1099 }
1100 default: {
1101 break;
1102 }
1103 }
1104
1105 Iterator().Forward(cpSize);
1106 }
1107 }
1108
GetRegExpFlag(char32_t cp,RegExpFlags & flag)1109 bool Lexer::GetRegExpFlag(char32_t cp, RegExpFlags &flag)
1110 {
1111 switch (cp) {
1112 case LEX_CHAR_LOWERCASE_G: {
1113 flag = RegExpFlags::GLOBAL;
1114 break;
1115 }
1116 case LEX_CHAR_LOWERCASE_I: {
1117 flag = RegExpFlags::IGNORE_CASE;
1118 break;
1119 }
1120 case LEX_CHAR_LOWERCASE_M: {
1121 flag = RegExpFlags::MULTILINE;
1122 break;
1123 }
1124 case LEX_CHAR_LOWERCASE_S: {
1125 flag = RegExpFlags::DOTALL;
1126 break;
1127 }
1128 case LEX_CHAR_LOWERCASE_U: {
1129 flag = RegExpFlags::UNICODE;
1130 break;
1131 }
1132 case LEX_CHAR_LOWERCASE_Y: {
1133 flag = RegExpFlags::STICKY;
1134 break;
1135 }
1136 case LEX_CHAR_LOWERCASE_D: {
1137 flag = RegExpFlags::HAS_INDICES;
1138 break;
1139 }
1140 default: {
1141 return false;
1142 }
1143 }
1144 return true;
1145 }
1146
ScanRegExpFlags()1147 RegExpFlags Lexer::ScanRegExpFlags()
1148 {
1149 RegExpFlags resultFlags = RegExpFlags::EMPTY;
1150
1151 while (true) {
1152 size_t cpSize {};
1153 auto cp = Iterator().PeekCp(&cpSize);
1154 if (!KeywordsUtil::IsIdentifierPart(cp)) {
1155 break;
1156 }
1157
1158 Iterator().Forward(cpSize);
1159
1160 RegExpFlags flag = RegExpFlags::EMPTY;
1161
1162 if (!GetRegExpFlag(cp, flag)) {
1163 if (cp == LEX_CHAR_SP) {
1164 return resultFlags;
1165 } else {
1166 ThrowError("Invalid RegExp flag");
1167 }
1168 }
1169
1170 if (flag == RegExpFlags::EMPTY || (resultFlags & flag) != 0) {
1171 ThrowError("Invalid RegExp flag");
1172 }
1173
1174 resultFlags = resultFlags | flag;
1175 }
1176
1177 return resultFlags;
1178 }
1179
ScanRegExp()1180 RegExp Lexer::ScanRegExp()
1181 {
1182 GetToken().type_ = TokenType::LITERAL_REGEXP;
1183
1184 const auto patternStart = Iterator().Index();
1185 ScanRegExpPattern();
1186 const auto pattern = SourceView(patternStart, Iterator().Index());
1187
1188 ASSERT(Iterator().Peek() == LEX_CHAR_SLASH);
1189 Iterator().Forward(1);
1190
1191 const auto flagsStart = Iterator().Index();
1192 RegExpFlags resultFlags = ScanRegExpFlags();
1193 const auto flags = SourceView(flagsStart, Iterator().Index());
1194
1195 SkipWhiteSpaces();
1196 SetTokenEnd();
1197
1198 return {pattern, flags, resultFlags};
1199 }
1200
CheckArrow()1201 bool Lexer::CheckArrow()
1202 {
1203 if (Iterator().Peek() != LEX_CHAR_EQUALS) {
1204 return false;
1205 }
1206 Iterator().Forward(1);
1207
1208 bool res = Iterator().Peek() == LEX_CHAR_GREATER_THAN;
1209 Iterator().Backward(1);
1210
1211 return res;
1212 }
1213
SetTokenStart()1214 void Lexer::SetTokenStart()
1215 {
1216 if (pos_.nextTokenLine != 0) {
1217 pos_.line += pos_.nextTokenLine;
1218 pos_.nextTokenLine = 0;
1219 GetToken().flags_ = TokenFlags::NEW_LINE;
1220 } else {
1221 GetToken().flags_ = TokenFlags::NONE;
1222 }
1223
1224 pos_.token.loc_.start = SourcePosition {Iterator().Index(), pos_.line};
1225 }
1226
SetTokenEnd()1227 void Lexer::SetTokenEnd()
1228 {
1229 pos_.token.loc_.end = SourcePosition {Iterator().Index(), pos_.line};
1230 }
1231
CheckAwaitKeyword()1232 void Lexer::CheckAwaitKeyword()
1233 {
1234 if (parserContext_->IsStaticBlock()) {
1235 ThrowError("'await' is not allowed in class static block");
1236 }
1237 // support top level await for module
1238 if (!parserContext_->IsAsync()) {
1239 if (!parserContext_->IsModule() || parserContext_->GetProgram()->IsDtsFile()) {
1240 GetToken().type_ = TokenType::LITERAL_IDENT;
1241 return;
1242 }
1243 if (parserContext_->GetProgram()->Extension() == ScriptExtension::TS) {
1244 if (parserContext_->IsTsModule()) {
1245 GetToken().type_ = TokenType::LITERAL_IDENT;
1246 return;
1247 }
1248 }
1249 }
1250
1251 if (parserContext_->DisallowAwait()) {
1252 ThrowError("'await' is not allowed");
1253 }
1254 if (GetToken().flags_ & TokenFlags::HAS_ESCAPE) {
1255 ThrowError("Keyword must not contain escaped characters");
1256 }
1257 GetToken().type_ = TokenType::KEYW_AWAIT;
1258 }
1259
CheckArgumentsKeyword()1260 void Lexer::CheckArgumentsKeyword()
1261 {
1262 if (parserContext_->DisallowArguments()) {
1263 ThrowError("'arguments' is not allowed in static block and field initializer");
1264 }
1265 }
1266
CheckKeywordEscape(TokenType type)1267 void Lexer::CheckKeywordEscape(TokenType type)
1268 {
1269 if (GetToken().flags_ & TokenFlags::HAS_ESCAPE) {
1270 ThrowError("Escape sequences are not allowed in keywords");
1271 }
1272
1273 GetToken().type_ = type;
1274 }
1275
CheckEnumKeyword()1276 void Lexer::CheckEnumKeyword()
1277 {
1278 if (parserContext_->GetProgram()->Extension() == ScriptExtension::JS) {
1279 ThrowError("Unexpected reserved keyword");
1280 }
1281
1282 if (GetToken().flags_ & TokenFlags::HAS_ESCAPE) {
1283 ThrowError("Escape sequences are not allowed in keywords");
1284 }
1285
1286 GetToken().type_ = TokenType::LITERAL_IDENT;
1287 }
1288
CheckLetKeyword()1289 void Lexer::CheckLetKeyword()
1290 {
1291 GetToken().type_ = TokenType::KEYW_LET;
1292 }
1293
CheckYieldKeyword()1294 void Lexer::CheckYieldKeyword()
1295 {
1296 if (!parserContext_->AllowYield()) {
1297 ThrowError("'yield' is not allowed");
1298 }
1299
1300 GetToken().type_ = TokenType::KEYW_YIELD;
1301 }
1302
CheckFutureReservedKeyword(TokenType keywordType)1303 void Lexer::CheckFutureReservedKeyword(TokenType keywordType)
1304 {
1305 GetToken().type_ = TokenType::LITERAL_IDENT;
1306
1307 if (parserContext_->GetProgram()->Extension() == ScriptExtension::TS && keywordType <= TokenType::KEYW_INTERFACE) {
1308 return;
1309 }
1310
1311 ThrowError("Unexpected strict mode reserved keyword");
1312 }
1313
SkipWhiteSpaces()1314 void Lexer::SkipWhiteSpaces()
1315 {
1316 while (true) {
1317 auto cp = Iterator().Peek();
1318
1319 switch (cp) {
1320 case LEX_CHAR_HASH_MARK: {
1321 Iterator().Forward(1);
1322 cp = Iterator().Peek();
1323 if (cp != LEX_CHAR_EXCLAMATION) {
1324 Iterator().Backward(1);
1325 return;
1326 }
1327 if (Iterator().Index() != 1) {
1328 /*
1329 * according to ECMA-262 specification item 12.5 Hashbang Comments are location-sensitive.
1330 * only allowed occurs at the beginning of files, other position is illegal.
1331 */
1332 Iterator().Backward(1);
1333 ThrowError("Invalid or unexpected token");
1334 }
1335
1336 Iterator().Forward(1);
1337 SkipSingleLineComment();
1338 continue;
1339 }
1340 case LEX_CHAR_CR: {
1341 Iterator().Forward(1);
1342
1343 if (Iterator().Peek() != LEX_CHAR_LF) {
1344 Iterator().Backward(1);
1345 }
1346
1347 [[fallthrough]];
1348 }
1349 case LEX_CHAR_LF: {
1350 Iterator().Forward(1);
1351 pos_.nextTokenLine++;
1352 continue;
1353 }
1354 case LEX_CHAR_VT:
1355 case LEX_CHAR_FF:
1356 case LEX_CHAR_SP:
1357 case LEX_CHAR_TAB: {
1358 Iterator().Forward(1);
1359 continue;
1360 }
1361 case LEX_CHAR_SLASH: {
1362 Iterator().Forward(1);
1363 cp = Iterator().Peek();
1364 if (cp == LEX_CHAR_SLASH) {
1365 Iterator().Forward(1);
1366 SkipSingleLineComment();
1367 continue;
1368 }
1369 if (cp == LEX_CHAR_ASTERISK) {
1370 Iterator().Forward(1);
1371 SkipMultiLineComment();
1372 continue;
1373 }
1374
1375 Iterator().Backward(1);
1376 return;
1377 }
1378 default: {
1379 if (cp < LEX_ASCII_MAX_BITS) {
1380 return;
1381 }
1382
1383 size_t cpSize {};
1384 cp = Iterator().PeekCp(&cpSize);
1385
1386 switch (cp) {
1387 case LEX_CHAR_LS:
1388 case LEX_CHAR_PS: {
1389 pos_.nextTokenLine++;
1390 [[fallthrough]];
1391 }
1392 case LEX_CHAR_NBSP:
1393 case LEX_CHAR_NLINE:
1394 case LEX_CHAR_IGSP:
1395 case LEX_CHAR_ZWNBSP: {
1396 Iterator().Forward(cpSize);
1397 continue;
1398 }
1399 default: {
1400 return;
1401 }
1402 }
1403 }
1404 }
1405 }
1406 }
1407
1408 // NOLINTNEXTLINE(readability-function-size)
NextToken(LexerNextTokenFlags flags)1409 void Lexer::NextToken(LexerNextTokenFlags flags)
1410 {
1411 Keywords kws(this, flags);
1412 KeywordsUtil &kwu = kws.Util();
1413
1414 SetTokenStart();
1415
1416 auto cp = Iterator().Peek();
1417 Iterator().Forward(1);
1418
1419 GetToken().keywordType_ = TokenType::EOS;
1420
1421 switch (cp) {
1422 case LEX_CHAR_EXCLAMATION: {
1423 ScanExclamationPunctuator();
1424 break;
1425 }
1426 case LEX_CHAR_SINGLE_QUOTE: {
1427 ScanString<LEX_CHAR_SINGLE_QUOTE>();
1428 break;
1429 }
1430 case LEX_CHAR_DOUBLE_QUOTE: {
1431 ScanString<LEX_CHAR_DOUBLE_QUOTE>();
1432 break;
1433 }
1434 case LEX_CHAR_HASH_MARK: {
1435 GetToken().type_ = TokenType::PUNCTUATOR_HASH_MARK;
1436 break;
1437 }
1438 case LEX_CHAR_PERCENT: {
1439 ScanPercentPunctuator();
1440 break;
1441 }
1442 case LEX_CHAR_AMPERSAND: {
1443 ScanAmpersandPunctuator();
1444 break;
1445 }
1446 case LEX_CHAR_LEFT_PAREN: {
1447 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_PARENTHESIS;
1448 break;
1449 }
1450 case LEX_CHAR_RIGHT_PAREN: {
1451 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_PARENTHESIS;
1452 break;
1453 }
1454 case LEX_CHAR_ASTERISK: {
1455 ScanAsterixPunctuator();
1456 break;
1457 }
1458 case LEX_CHAR_PLUS: {
1459 ScanPlusPunctuator();
1460 break;
1461 }
1462 case LEX_CHAR_COMMA: {
1463 GetToken().type_ = TokenType::PUNCTUATOR_COMMA;
1464 break;
1465 }
1466 case LEX_CHAR_MINUS: {
1467 ScanMinusPunctuator();
1468 break;
1469 }
1470 case LEX_CHAR_DOT: {
1471 ScanDotPunctuator();
1472 break;
1473 }
1474 case LEX_CHAR_SLASH: {
1475 ScanSlashPunctuator();
1476 break;
1477 }
1478 case LEX_CHAR_0: {
1479 ScanNumberLeadingZero();
1480 break;
1481 }
1482 case LEX_CHAR_1:
1483 case LEX_CHAR_2:
1484 case LEX_CHAR_3:
1485 case LEX_CHAR_4:
1486 case LEX_CHAR_5:
1487 case LEX_CHAR_6:
1488 case LEX_CHAR_7:
1489 case LEX_CHAR_8:
1490 case LEX_CHAR_9: {
1491 ScanNumber();
1492 break;
1493 }
1494 case LEX_CHAR_COLON: {
1495 GetToken().type_ = TokenType::PUNCTUATOR_COLON;
1496 break;
1497 }
1498 case LEX_CHAR_SEMICOLON: {
1499 GetToken().type_ = TokenType::PUNCTUATOR_SEMI_COLON;
1500 break;
1501 }
1502 case LEX_CHAR_LESS_THAN: {
1503 ScanLessThanPunctuator();
1504 break;
1505 }
1506 case LEX_CHAR_EQUALS: {
1507 ScanEqualsPunctuator();
1508 break;
1509 }
1510 case LEX_CHAR_GREATER_THAN: {
1511 ScanGreaterThanPunctuator();
1512 break;
1513 }
1514 case LEX_CHAR_QUESTION: {
1515 ScanQuestionPunctuator();
1516 break;
1517 }
1518 case LEX_CHAR_AT: {
1519 GetToken().type_ = TokenType::PUNCTUATOR_AT;
1520 break;
1521 }
1522 case LEX_CHAR_DOLLAR_SIGN:
1523 case LEX_CHAR_UPPERCASE_A:
1524 case LEX_CHAR_UPPERCASE_B:
1525 case LEX_CHAR_UPPERCASE_C:
1526 case LEX_CHAR_UPPERCASE_D:
1527 case LEX_CHAR_UPPERCASE_E:
1528 case LEX_CHAR_UPPERCASE_F:
1529 case LEX_CHAR_UPPERCASE_G:
1530 case LEX_CHAR_UPPERCASE_H:
1531 case LEX_CHAR_UPPERCASE_I:
1532 case LEX_CHAR_UPPERCASE_J:
1533 case LEX_CHAR_UPPERCASE_K:
1534 case LEX_CHAR_UPPERCASE_L:
1535 case LEX_CHAR_UPPERCASE_M:
1536 case LEX_CHAR_UPPERCASE_N:
1537 case LEX_CHAR_UPPERCASE_O:
1538 case LEX_CHAR_UPPERCASE_P:
1539 case LEX_CHAR_UPPERCASE_Q:
1540 case LEX_CHAR_UPPERCASE_R:
1541 case LEX_CHAR_UPPERCASE_S:
1542 case LEX_CHAR_UPPERCASE_T:
1543 case LEX_CHAR_UPPERCASE_U:
1544 case LEX_CHAR_UPPERCASE_V:
1545 case LEX_CHAR_UPPERCASE_W:
1546 case LEX_CHAR_UPPERCASE_X:
1547 case LEX_CHAR_UPPERCASE_Y:
1548 case LEX_CHAR_UPPERCASE_Z:
1549 case LEX_CHAR_UNDERSCORE:
1550 case LEX_CHAR_LOWERCASE_H:
1551 case LEX_CHAR_LOWERCASE_J:
1552 case LEX_CHAR_LOWERCASE_Q:
1553 case LEX_CHAR_LOWERCASE_X:
1554 case LEX_CHAR_LOWERCASE_Z: {
1555 kwu.ScanIdContinue();
1556 break;
1557 }
1558 case LEX_CHAR_LEFT_SQUARE: {
1559 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_SQUARE_BRACKET;
1560 break;
1561 }
1562 case LEX_CHAR_BACKSLASH: {
1563 GetToken().flags_ |= TokenFlags::HAS_ESCAPE;
1564
1565 if (Iterator().Peek() != LEX_CHAR_LOWERCASE_U) {
1566 ThrowError("Invalid character");
1567 }
1568
1569 cp = ScanUnicodeEscapeSequence();
1570
1571 kwu.ScanIdentifierStart(cp);
1572 break;
1573 }
1574 case LEX_CHAR_RIGHT_SQUARE: {
1575 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_SQUARE_BRACKET;
1576 break;
1577 }
1578 case LEX_CHAR_CIRCUMFLEX: {
1579 ScanCircumflexPunctuator();
1580 break;
1581 }
1582 case LEX_CHAR_BACK_TICK: {
1583 GetToken().type_ = TokenType::PUNCTUATOR_BACK_TICK;
1584 SetTokenEnd();
1585 return;
1586 }
1587 case LEX_CHAR_LOWERCASE_A: {
1588 kws.ScanA();
1589 break;
1590 }
1591 case LEX_CHAR_LOWERCASE_B: {
1592 kws.ScanB();
1593 break;
1594 }
1595 case LEX_CHAR_LOWERCASE_C: {
1596 kws.ScanC();
1597 break;
1598 }
1599 case LEX_CHAR_LOWERCASE_D: {
1600 kws.ScanD();
1601 break;
1602 }
1603 case LEX_CHAR_LOWERCASE_E: {
1604 kws.ScanE();
1605 break;
1606 }
1607 case LEX_CHAR_LOWERCASE_F: {
1608 kws.ScanF();
1609 break;
1610 }
1611 case LEX_CHAR_LOWERCASE_G: {
1612 kws.ScanG();
1613 break;
1614 }
1615 case LEX_CHAR_LOWERCASE_I: {
1616 kws.ScanI();
1617 break;
1618 }
1619 case LEX_CHAR_LOWERCASE_K: {
1620 kws.ScanK();
1621 break;
1622 }
1623 case LEX_CHAR_LOWERCASE_L: {
1624 kws.ScanL();
1625 break;
1626 }
1627 case LEX_CHAR_LOWERCASE_M: {
1628 kws.ScanM();
1629 break;
1630 }
1631 case LEX_CHAR_LOWERCASE_N: {
1632 kws.ScanN();
1633 break;
1634 }
1635 case LEX_CHAR_LOWERCASE_O: {
1636 kws.ScanO();
1637 break;
1638 }
1639 case LEX_CHAR_LOWERCASE_P: {
1640 kws.ScanP();
1641 break;
1642 }
1643 case LEX_CHAR_LOWERCASE_R: {
1644 kws.ScanR();
1645 break;
1646 }
1647 case LEX_CHAR_LOWERCASE_S: {
1648 kws.ScanS();
1649 break;
1650 }
1651 case LEX_CHAR_LOWERCASE_T: {
1652 kws.ScanT();
1653 break;
1654 }
1655 case LEX_CHAR_LOWERCASE_U: {
1656 kws.ScanU();
1657 break;
1658 }
1659 case LEX_CHAR_LOWERCASE_V: {
1660 kws.ScanV();
1661 break;
1662 }
1663 case LEX_CHAR_LOWERCASE_W: {
1664 kws.ScanW();
1665 break;
1666 }
1667 case LEX_CHAR_LOWERCASE_Y: {
1668 kws.ScanY();
1669 break;
1670 }
1671 case LEX_CHAR_LEFT_BRACE: {
1672 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_BRACE;
1673
1674 if (tlCtx_) {
1675 tlCtx_->ConsumeLeftBrace();
1676 }
1677
1678 break;
1679 }
1680 case LEX_CHAR_VLINE: {
1681 ScanVLinePunctuator();
1682 break;
1683 }
1684 case LEX_CHAR_RIGHT_BRACE: {
1685 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_BRACE;
1686
1687 if (tlCtx_ && tlCtx_->ConsumeRightBrace()) {
1688 SetTokenEnd();
1689 return;
1690 }
1691
1692 break;
1693 }
1694 case LEX_CHAR_TILDE: {
1695 GetToken().type_ = TokenType::PUNCTUATOR_TILDE;
1696 break;
1697 }
1698 default: {
1699 Iterator().Backward(1);
1700
1701 if (cp == util::StringView::Iterator::INVALID_CP) {
1702 GetToken().type_ = TokenType::EOS;
1703 break;
1704 }
1705
1706 cp = Iterator().Next();
1707 kwu.ScanIdentifierStart(cp);
1708 break;
1709 }
1710 }
1711
1712 SetTokenEnd();
1713 SkipWhiteSpaces();
1714 }
1715
AssignTokenEscapeError()1716 void Lexer::AssignTokenEscapeError()
1717 {
1718 GetToken().flags_ |= TokenFlags::ESCAPE_ERROR;
1719 }
1720
AssignTokenTaggedTemplate()1721 void Lexer::AssignTokenTaggedTemplate()
1722 {
1723 GetToken().flags_ |= TokenFlags::TAGGED_TEMPLATE;
1724 }
1725
CheckTokenIsTaggedTemplate() const1726 bool Lexer::CheckTokenIsTaggedTemplate() const
1727 {
1728 return GetToken().IsTaggedTemplate();
1729 }
1730
1731 } // namespace panda::es2panda::lexer
1732