1 /**
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "lexer.h"
17
18 #include <gen/keywords.h>
19 #include <parser/context/parserContext.h>
20
21 namespace panda::es2panda::lexer {
22
LexerPosition(const util::StringView & source)23 LexerPosition::LexerPosition(const util::StringView &source) : iterator(source) {}
24
Lexer(const parser::ParserContext * parserContext)25 Lexer::Lexer(const parser::ParserContext *parserContext)
26 : allocator_(parserContext->GetProgram()->Allocator()),
27 parserContext_(parserContext),
28 source_(parserContext->GetProgram()->SourceCode()),
29 pos_(source_)
30 {
31 SkipWhiteSpaces();
32 }
33
ScanUnicodeEscapeSequence()34 char32_t Lexer::ScanUnicodeEscapeSequence()
35 {
36 ASSERT(Iterator().Peek() == LEX_CHAR_LOWERCASE_U);
37
38 Iterator().Forward(1);
39
40 if (Iterator().Peek() == LEX_CHAR_LEFT_BRACE) {
41 Iterator().Forward(1);
42 return ScanUnicodeCodePointEscape();
43 }
44
45 // 4: a template parameter to the expected fixed length when scanning Unicode escape sequences
46 return ScanHexEscape<4>();
47 }
48
ScanUnicodeCodePointEscape()49 char32_t Lexer::ScanUnicodeCodePointEscape()
50 {
51 double code = 0;
52 char32_t cp {};
53
54 while (true) {
55 cp = Iterator().Peek();
56 if (!IsHexDigit(cp)) {
57 break;
58 }
59
60 Iterator().Forward(1);
61
62 constexpr auto multiplier = 16;
63 code = code * multiplier + HexValue(cp);
64 if (code > UNICODE_CODE_POINT_MAX) {
65 if (CheckTokenIsTaggedTemplate()) {
66 AssignTokenEscapeError();
67 break;
68 }
69 ThrowError("Invalid unicode escape sequence");
70 }
71 }
72 if (cp != LEX_CHAR_RIGHT_BRACE) {
73 if (CheckTokenIsTaggedTemplate()) {
74 AssignTokenEscapeError();
75 return static_cast<char32_t>(code);
76 } else {
77 ThrowError("Invalid unicode escape sequence");
78 }
79 }
80
81 Iterator().Forward(1);
82 return static_cast<char32_t>(code);
83 }
84
Allocator()85 ArenaAllocator *Lexer::Allocator()
86 {
87 return allocator_;
88 }
89
GetToken()90 Token &Lexer::GetToken()
91 {
92 return pos_.token;
93 }
94
GetToken() const95 const Token &Lexer::GetToken() const
96 {
97 return pos_.token;
98 }
99
Line() const100 size_t Lexer::Line() const
101 {
102 return pos_.line;
103 }
104
Save() const105 LexerPosition Lexer::Save() const
106 {
107 return pos_;
108 }
109
BackwardToken(TokenType type,size_t offset)110 void Lexer::BackwardToken(TokenType type, size_t offset)
111 {
112 pos_.token.type_ = type;
113 pos_.iterator.Reset(GetToken().End().index - offset);
114 pos_.nextTokenLine = 0;
115 }
116
ForwardToken(TokenType type,size_t offset)117 void Lexer::ForwardToken(TokenType type, size_t offset)
118 {
119 SetTokenStart();
120 pos_.token.type_ = type;
121 pos_.iterator.Forward(offset);
122 SetTokenEnd();
123 SkipWhiteSpaces();
124 }
125
Rewind(const LexerPosition & pos)126 void Lexer::Rewind(const LexerPosition &pos)
127 {
128 pos_ = pos;
129 }
130
Lookahead()131 char32_t Lexer::Lookahead()
132 {
133 return Iterator().Peek();
134 }
135
SourceView(const util::StringView::Iterator & begin,const util::StringView::Iterator & end) const136 util::StringView Lexer::SourceView(const util::StringView::Iterator &begin, const util::StringView::Iterator &end) const
137 {
138 return SourceView(begin.Index(), end.Index());
139 }
140
SourceView(size_t begin,size_t end) const141 util::StringView Lexer::SourceView(size_t begin, size_t end) const
142 {
143 return source_.Substr(begin, end);
144 }
145
SkipMultiLineComment()146 void Lexer::SkipMultiLineComment()
147 {
148 while (true) {
149 switch (Iterator().Next()) {
150 case util::StringView::Iterator::INVALID_CP: {
151 ThrowError("Unterminated multi-line comment");
152 break;
153 }
154 case LEX_CHAR_CR: {
155 if (Iterator().Peek() == LEX_CHAR_LF) {
156 Iterator().Forward(1);
157 }
158
159 [[fallthrough]];
160 }
161 case LEX_CHAR_LF:
162 case LEX_CHAR_LS:
163 case LEX_CHAR_PS: {
164 pos_.nextTokenLine++;
165 continue;
166 }
167 case LEX_CHAR_ASTERISK: {
168 if (Iterator().Peek() == LEX_CHAR_SLASH) {
169 Iterator().Forward(1);
170 return;
171 }
172
173 break;
174 }
175 default: {
176 break;
177 }
178 }
179 }
180 }
181
182 /* New line character is not processed */
SkipSingleLineComment()183 void Lexer::SkipSingleLineComment()
184 {
185 while (true) {
186 // INVALID_CP may appear in the middle of a comment
187 // It can not be used to determine the end of the comment.
188 if (!Iterator().HasNext()) {
189 Iterator().Next();
190 pos_.nextTokenLine++;
191 return;
192 }
193 switch (Iterator().Next()) {
194 case util::StringView::Iterator::INVALID_CP: {
195 // This return means if an INVALID_CP appeared in a single comment,
196 // it will terminates single comment,
197 // but INVALID_CP should not terminate single comment,
198 // only the end of the Iterator can terminates single comment and add pos_.nextTokenLine,
199 // this return should be removed in another issure because it is an incompatible bug fix.
200 return;
201 }
202 case LEX_CHAR_CR: {
203 if (Iterator().Peek() == LEX_CHAR_LF) {
204 Iterator().Forward(1);
205 }
206
207 [[fallthrough]];
208 }
209 case LEX_CHAR_LF:
210 case LEX_CHAR_LS:
211 case LEX_CHAR_PS: {
212 pos_.nextTokenLine++;
213 return;
214 }
215 default: {
216 break;
217 }
218 }
219 }
220 }
221
ThrowError(std::string_view message)222 void Lexer::ThrowError(std::string_view message)
223 {
224 lexer::LineIndex lineIndex = parserContext_->GetProgram()->GetLineIndex();
225 SourceLocation loc = lineIndex.GetLocation(SourcePosition(Iterator().Index(), pos_.line + pos_.nextTokenLine));
226 throw es2panda::Error(es2panda::ErrorType::SYNTAX, message, loc.line, loc.col);
227 }
228
CheckNumberLiteralEnd()229 void Lexer::CheckNumberLiteralEnd()
230 {
231 if (Iterator().Peek() == LEX_CHAR_LOWERCASE_N) {
232 Iterator().Forward(1);
233 GetToken().flags_ |= TokenFlags::NUMBER_BIGINT;
234 }
235
236 GetToken().src_ = SourceView(GetToken().Start().index, Iterator().Index());
237 const auto nextCp = Iterator().PeekCp();
238 if (KeywordsUtil::IsIdentifierStart(nextCp) || IsDecimalDigit(nextCp)) {
239 ThrowError("Invalid numeric literal");
240 }
241 }
242
ScanNumberLeadingZero()243 void Lexer::ScanNumberLeadingZero()
244 {
245 GetToken().type_ = TokenType::LITERAL_NUMBER;
246
247 switch (Iterator().Peek()) {
248 case LEX_CHAR_LOWERCASE_X:
249 case LEX_CHAR_UPPERCASE_X: {
250 Iterator().Forward(1);
251 constexpr auto RADIX = 16;
252 ScanNumberRadix<IsHexDigit, RADIX>();
253 CheckNumberLiteralEnd();
254 return;
255 }
256 case LEX_CHAR_LOWERCASE_B:
257 case LEX_CHAR_UPPERCASE_B: {
258 Iterator().Forward(1);
259 constexpr auto RADIX = 2;
260 ScanNumberRadix<IsBinaryDigit, RADIX>();
261 CheckNumberLiteralEnd();
262 return;
263 }
264 case LEX_CHAR_LOWERCASE_O:
265 case LEX_CHAR_UPPERCASE_O: {
266 Iterator().Forward(1);
267 constexpr auto RADIX = 8;
268 ScanNumberRadix<IsOctalDigit, RADIX>();
269
270 switch (Iterator().Peek()) {
271 case LEX_CHAR_8:
272 case LEX_CHAR_9: {
273 ThrowError("Invalid octal digit");
274 break;
275 }
276 default: {
277 break;
278 }
279 }
280
281 CheckNumberLiteralEnd();
282 return;
283 }
284 case LEX_CHAR_0:
285 case LEX_CHAR_1:
286 case LEX_CHAR_2:
287 case LEX_CHAR_3:
288 case LEX_CHAR_4:
289 case LEX_CHAR_5:
290 case LEX_CHAR_6:
291 case LEX_CHAR_7: {
292 ThrowError("Implicit octal literal not allowed");
293 break;
294 }
295 case LEX_CHAR_8:
296 case LEX_CHAR_9: {
297 ThrowError("NonOctalDecimalIntegerLiteral is not enabled in strict mode code");
298 break;
299 }
300 default: {
301 break;
302 }
303 }
304
305 ScanNumber(Iterator().Peek() == LEX_CHAR_0);
306 }
307
ScanDecimalNumbers(bool allowNumericSeparator)308 void Lexer::ScanDecimalNumbers(bool allowNumericSeparator)
309 {
310 bool allowNumericOnNext = true;
311
312 while (true) {
313 switch (Iterator().Peek()) {
314 case LEX_CHAR_0:
315 case LEX_CHAR_1:
316 case LEX_CHAR_2:
317 case LEX_CHAR_3:
318 case LEX_CHAR_4:
319 case LEX_CHAR_5:
320 case LEX_CHAR_6:
321 case LEX_CHAR_7:
322 case LEX_CHAR_8:
323 case LEX_CHAR_9: {
324 Iterator().Forward(1);
325 allowNumericOnNext = true;
326 break;
327 }
328 case LEX_CHAR_UNDERSCORE: {
329 Iterator().Backward(1);
330 isUnderscore_ = true;
331
332 if (Iterator().Peek() == LEX_CHAR_DOT || !allowNumericSeparator || !allowNumericOnNext) {
333 Iterator().Forward(1);
334 ThrowError("Invalid numeric separator");
335 }
336
337 GetToken().flags_ |= TokenFlags::NUMBER_HAS_UNDERSCORE;
338 Iterator().Forward(2);
339 allowNumericOnNext = false;
340 break;
341 }
342 default: {
343 if (!allowNumericOnNext) {
344 ThrowError("Numeric separators are not allowed at the end of numeric literals");
345 }
346 return;
347 }
348 }
349 }
350 }
351
ConvertNumber(size_t exponentSignPos)352 void Lexer::ConvertNumber(size_t exponentSignPos)
353 {
354 util::StringView sv = SourceView(GetToken().Start().index, Iterator().Index());
355 std::string utf8 = std::string {sv.Utf8()};
356 bool needConversion = false;
357
358 if (exponentSignPos != std::numeric_limits<size_t>::max()) {
359 utf8.insert(exponentSignPos, 1, '+');
360 needConversion = true;
361 }
362
363 if (GetToken().flags_ & TokenFlags::NUMBER_HAS_UNDERSCORE) {
364 utf8.erase(std::remove(utf8.begin(), utf8.end(), LEX_CHAR_UNDERSCORE), utf8.end());
365 needConversion = true;
366 }
367
368 if (needConversion) {
369 util::UString converted(utf8, Allocator());
370 GetToken().src_ = converted.View();
371 } else {
372 GetToken().src_ = sv;
373 }
374
375 try {
376 GetToken().number_ = static_cast<double>(std::stold(utf8, nullptr));
377 } catch (const std::invalid_argument &) {
378 ThrowError("Invalid number");
379 } catch (const std::out_of_range &) {
380 // TODO(frobert): look for a more elegant solution to this
381 GetToken().number_ = std::numeric_limits<double>::infinity();
382 }
383 }
ScanNumber(bool allowNumericSeparator,bool allowBigInt)384 void Lexer::ScanNumber(bool allowNumericSeparator, bool allowBigInt)
385 {
386 GetToken().type_ = TokenType::LITERAL_NUMBER;
387
388 ScanDecimalNumbers(allowNumericSeparator);
389
390 size_t exponentSignPos = std::numeric_limits<size_t>::max();
391 bool parseExponent = true;
392
393 if (Iterator().Peek() == LEX_CHAR_DOT) {
394 allowBigInt = false;
395 Iterator().Forward(1);
396
397 auto cp = Iterator().Peek();
398 if (IsDecimalDigit(cp) || cp == LEX_CHAR_LOWERCASE_E || LEX_CHAR_UPPERCASE_E) {
399 ScanDecimalNumbers(allowNumericSeparator);
400 } else {
401 parseExponent = false;
402 }
403 }
404
405 switch (Iterator().Peek()) {
406 case LEX_CHAR_LOWERCASE_E:
407 case LEX_CHAR_UPPERCASE_E: {
408 allowBigInt = false;
409
410 if (!parseExponent) {
411 break;
412 }
413
414 Iterator().Forward(1);
415
416 switch (Iterator().Peek()) {
417 case LEX_CHAR_UNDERSCORE: {
418 break;
419 }
420 case LEX_CHAR_PLUS:
421 case LEX_CHAR_MINUS: {
422 Iterator().Forward(1);
423 break;
424 }
425 default: {
426 exponentSignPos = Iterator().Index() - GetToken().Start().index;
427 break;
428 }
429 }
430
431 if (!IsDecimalDigit(Iterator().Peek())) {
432 ThrowError("Invalid numeric literal");
433 }
434 ScanDecimalNumbers(allowNumericSeparator);
435 break;
436 }
437 default: {
438 break;
439 }
440 }
441
442 CheckNumberLiteralEnd();
443
444 if (GetToken().flags_ & TokenFlags::NUMBER_BIGINT) {
445 if (!allowBigInt) {
446 ThrowError("Invalid BigInt number");
447 }
448 if (isUnderscore_) {
449 ConvertNumber(exponentSignPos);
450 isUnderscore_ = false;
451 }
452
453 return;
454 }
455
456 ConvertNumber(exponentSignPos);
457 }
458
PushTemplateContext(TemplateLiteralParserContext * ctx)459 void Lexer::PushTemplateContext(TemplateLiteralParserContext *ctx)
460 {
461 tlCtx_ = ctx;
462 }
463
ScanTemplateStringEnd()464 void Lexer::ScanTemplateStringEnd()
465 {
466 ASSERT(Iterator().Peek() == LEX_CHAR_BACK_TICK);
467 Iterator().Forward(1);
468 SetTokenEnd();
469 SkipWhiteSpaces();
470 }
471
ScanTemplateString()472 LexerTemplateString Lexer::ScanTemplateString()
473 {
474 LexerTemplateString templateStr(Allocator());
475 size_t cpSize = 0;
476
477 while (true) {
478 char32_t cp = Iterator().PeekCp(&cpSize);
479
480 switch (cp) {
481 case util::StringView::Iterator::INVALID_CP: {
482 ThrowError("Unexpected token, expected '${' or '`'");
483 break;
484 }
485 case LEX_CHAR_BACK_TICK: {
486 templateStr.end = Iterator().Index();
487 return templateStr;
488 }
489 case LEX_CHAR_CR: {
490 Iterator().Forward(1);
491
492 if (Iterator().Peek() != LEX_CHAR_LF) {
493 Iterator().Backward(1);
494 }
495
496 [[fallthrough]];
497 }
498 case LEX_CHAR_LF: {
499 pos_.line++;
500 templateStr.str.Append(LEX_CHAR_LF);
501 Iterator().Forward(1);
502 continue;
503 }
504 case LEX_CHAR_BACKSLASH: {
505 Iterator().Forward(1);
506
507 char32_t nextCp = Iterator().Peek();
508 if (nextCp == LEX_CHAR_BACK_TICK || nextCp == LEX_CHAR_BACKSLASH || nextCp == LEX_CHAR_DOLLAR_SIGN) {
509 templateStr.str.Append(cp);
510 templateStr.str.Append(nextCp);
511 Iterator().Forward(1);
512 continue;
513 }
514
515 Iterator().Backward(1);
516 break;
517 }
518 case LEX_CHAR_DOLLAR_SIGN: {
519 templateStr.end = Iterator().Index();
520 Iterator().Forward(1);
521
522 if (Iterator().Peek() == LEX_CHAR_LEFT_BRACE) {
523 Iterator().Forward(1);
524 templateStr.scanExpression = true;
525 SkipWhiteSpaces();
526 return templateStr;
527 }
528
529 templateStr.str.Append(cp);
530 continue;
531 }
532 default: {
533 break;
534 }
535 }
536
537 templateStr.str.Append(cp);
538 Iterator().Forward(cpSize);
539 }
540
541 UNREACHABLE();
542 return templateStr;
543 }
544
ResetTokenEnd()545 void Lexer::ResetTokenEnd()
546 {
547 SetTokenStart();
548 pos_.iterator.Reset(GetToken().End().index);
549 pos_.line = GetToken().End().line;
550 pos_.nextTokenLine = 0;
551 }
552
ScanStringUnicodePart(util::UString * str)553 void Lexer::ScanStringUnicodePart(util::UString *str)
554 {
555 size_t cpSize {};
556 char32_t cp = Iterator().PeekCp(&cpSize);
557
558 switch (cp) {
559 case util::StringView::Iterator::INVALID_CP: {
560 ThrowError("Unterminated string");
561 break;
562 }
563 case LEX_CHAR_CR: {
564 Iterator().Forward(1);
565 if (Iterator().Peek() != LEX_CHAR_LF) {
566 Iterator().Backward(1);
567 }
568
569 [[fallthrough]];
570 }
571 case LEX_CHAR_LS:
572 case LEX_CHAR_PS:
573 case LEX_CHAR_LF: {
574 pos_.line++;
575 Iterator().Forward(cpSize);
576 return;
577 }
578 case LEX_CHAR_LOWERCASE_B: {
579 cp = LEX_CHAR_BS;
580 break;
581 }
582 case LEX_CHAR_LOWERCASE_T: {
583 cp = LEX_CHAR_TAB;
584 break;
585 }
586 case LEX_CHAR_LOWERCASE_N: {
587 cp = LEX_CHAR_LF;
588 break;
589 }
590 case LEX_CHAR_LOWERCASE_V: {
591 cp = LEX_CHAR_VT;
592 break;
593 }
594 case LEX_CHAR_LOWERCASE_F: {
595 cp = LEX_CHAR_FF;
596 break;
597 }
598 case LEX_CHAR_LOWERCASE_R: {
599 cp = LEX_CHAR_CR;
600 break;
601 }
602 case LEX_CHAR_LOWERCASE_X: {
603 Iterator().Forward(1);
604 // 2: a template parameter to the expected fixed length when scanning Unicode escape sequences
605 str->Append(ScanHexEscape<2>());
606 return;
607 }
608 case LEX_CHAR_LOWERCASE_U: {
609 cp = ScanUnicodeEscapeSequence();
610 str->Append(cp);
611 return;
612 }
613 case LEX_CHAR_0: {
614 Iterator().Forward(1);
615 bool isDecimal = IsDecimalDigit(Iterator().Peek());
616 bool isOctal = IsOctalDigit(Iterator().Peek());
617 Iterator().Backward(1);
618
619 if (!isDecimal) {
620 cp = LEX_CHAR_NULL;
621 break;
622 }
623
624 if (isOctal) {
625 if (CheckTokenIsTaggedTemplate()) {
626 AssignTokenEscapeError();
627 break;
628 }
629 ThrowError("Octal escape sequences are not allowed in strict mode");
630 }
631
632 [[fallthrough]];
633 }
634 default: {
635 if (IsDecimalDigit(Iterator().Peek())) {
636 if (CheckTokenIsTaggedTemplate()) {
637 AssignTokenEscapeError();
638 break;
639 }
640 ThrowError("Invalid character escape sequence in strict mode");
641 }
642
643 break;
644 }
645 }
646
647 Iterator().Forward(cpSize);
648 str->Append(cp);
649 }
650
ScanQuestionPunctuator()651 void Lexer::ScanQuestionPunctuator()
652 {
653 GetToken().type_ = TokenType::PUNCTUATOR_QUESTION_MARK;
654
655 switch (Iterator().Peek()) {
656 case LEX_CHAR_QUESTION: {
657 GetToken().type_ = TokenType::PUNCTUATOR_NULLISH_COALESCING;
658 Iterator().Forward(1);
659
660 switch (Iterator().Peek()) {
661 case LEX_CHAR_EQUALS: {
662 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_NULLISH_EQUAL;
663 Iterator().Forward(1);
664 break;
665 }
666 default: {
667 break;
668 }
669 }
670
671 break;
672 }
673 case LEX_CHAR_DOT: {
674 Iterator().Forward(1);
675
676 if (!IsDecimalDigit(Iterator().Peek())) {
677 GetToken().type_ = TokenType::PUNCTUATOR_QUESTION_DOT;
678 return;
679 }
680
681 Iterator().Backward(1);
682 break;
683 }
684 default: {
685 break;
686 }
687 }
688 }
689
ScanLessThanPunctuator()690 void Lexer::ScanLessThanPunctuator()
691 {
692 GetToken().type_ = TokenType::PUNCTUATOR_LESS_THAN;
693
694 switch (Iterator().Peek()) {
695 case LEX_CHAR_LESS_THAN: {
696 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_SHIFT;
697 Iterator().Forward(1);
698
699 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
700 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_SHIFT_EQUAL;
701 Iterator().Forward(1);
702 }
703 break;
704 }
705 case LEX_CHAR_EQUALS: {
706 GetToken().type_ = TokenType::PUNCTUATOR_LESS_THAN_EQUAL;
707 Iterator().Forward(1);
708 break;
709 }
710 default: {
711 break;
712 }
713 }
714 }
715
ScanGreaterThanPunctuator()716 void Lexer::ScanGreaterThanPunctuator()
717 {
718 GetToken().type_ = TokenType::PUNCTUATOR_GREATER_THAN;
719
720 switch (Iterator().Peek()) {
721 case LEX_CHAR_GREATER_THAN: {
722 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_SHIFT;
723 Iterator().Forward(1);
724
725 switch (Iterator().Peek()) {
726 case LEX_CHAR_GREATER_THAN: {
727 GetToken().type_ = TokenType::PUNCTUATOR_UNSIGNED_RIGHT_SHIFT;
728 Iterator().Forward(1);
729
730 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
731 GetToken().type_ = TokenType::PUNCTUATOR_UNSIGNED_RIGHT_SHIFT_EQUAL;
732 Iterator().Forward(1);
733 }
734 break;
735 }
736 case LEX_CHAR_EQUALS: {
737 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_SHIFT_EQUAL;
738 Iterator().Forward(1);
739 break;
740 }
741 default: {
742 break;
743 }
744 }
745 break;
746 }
747 case LEX_CHAR_EQUALS: {
748 GetToken().type_ = TokenType::PUNCTUATOR_GREATER_THAN_EQUAL;
749 Iterator().Forward(1);
750 break;
751 }
752 default: {
753 break;
754 }
755 }
756 }
757
ScanEqualsPunctuator()758 void Lexer::ScanEqualsPunctuator()
759 {
760 GetToken().type_ = TokenType::PUNCTUATOR_SUBSTITUTION;
761
762 switch (Iterator().Peek()) {
763 case LEX_CHAR_EQUALS: {
764 GetToken().type_ = TokenType::PUNCTUATOR_EQUAL;
765 Iterator().Forward(1);
766
767 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
768 GetToken().type_ = TokenType::PUNCTUATOR_STRICT_EQUAL;
769 Iterator().Forward(1);
770 }
771 break;
772 }
773 case LEX_CHAR_GREATER_THAN: {
774 GetToken().type_ = TokenType::PUNCTUATOR_ARROW;
775 Iterator().Forward(1);
776 break;
777 }
778 default: {
779 break;
780 }
781 }
782 }
783
ScanExclamationPunctuator()784 void Lexer::ScanExclamationPunctuator()
785 {
786 GetToken().type_ = TokenType::PUNCTUATOR_EXCLAMATION_MARK;
787
788 switch (Iterator().Peek()) {
789 case LEX_CHAR_EQUALS: {
790 GetToken().type_ = TokenType::PUNCTUATOR_NOT_EQUAL;
791 Iterator().Forward(1);
792
793 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
794 GetToken().type_ = TokenType::PUNCTUATOR_NOT_STRICT_EQUAL;
795 Iterator().Forward(1);
796 }
797 break;
798 }
799 default: {
800 break;
801 }
802 }
803 }
804
ScanAmpersandPunctuator()805 void Lexer::ScanAmpersandPunctuator()
806 {
807 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_AND;
808
809 switch (Iterator().Peek()) {
810 case LEX_CHAR_AMPERSAND: {
811 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_AND;
812 Iterator().Forward(1);
813
814 switch (Iterator().Peek()) {
815 case LEX_CHAR_EQUALS: {
816 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_AND_EQUAL;
817 Iterator().Forward(1);
818 break;
819 }
820 default: {
821 break;
822 }
823 }
824
825 break;
826 }
827 case LEX_CHAR_EQUALS: {
828 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_AND_EQUAL;
829 Iterator().Forward(1);
830 break;
831 }
832 default: {
833 break;
834 }
835 }
836 }
837
ScanVLinePunctuator()838 void Lexer::ScanVLinePunctuator()
839 {
840 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_OR;
841
842 switch (Iterator().Peek()) {
843 case LEX_CHAR_VLINE: {
844 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_OR;
845 Iterator().Forward(1);
846
847 switch (Iterator().Peek()) {
848 case LEX_CHAR_EQUALS: {
849 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_OR_EQUAL;
850 Iterator().Forward(1);
851 break;
852 }
853 default: {
854 break;
855 }
856 }
857
858 break;
859 }
860 case LEX_CHAR_EQUALS: {
861 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_OR_EQUAL;
862 Iterator().Forward(1);
863 break;
864 }
865 default: {
866 break;
867 }
868 }
869 }
870
ScanCircumflexPunctuator()871 void Lexer::ScanCircumflexPunctuator()
872 {
873 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_XOR;
874
875 switch (Iterator().Peek()) {
876 case LEX_CHAR_EQUALS: {
877 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_XOR_EQUAL;
878 Iterator().Forward(1);
879 break;
880 }
881 default: {
882 break;
883 }
884 }
885 }
886
ScanPlusPunctuator()887 void Lexer::ScanPlusPunctuator()
888 {
889 GetToken().type_ = TokenType::PUNCTUATOR_PLUS;
890
891 switch (Iterator().Peek()) {
892 case LEX_CHAR_PLUS: {
893 GetToken().type_ = TokenType::PUNCTUATOR_PLUS_PLUS;
894 Iterator().Forward(1);
895 break;
896 }
897 case LEX_CHAR_EQUALS: {
898 GetToken().type_ = TokenType::PUNCTUATOR_PLUS_EQUAL;
899 Iterator().Forward(1);
900 break;
901 }
902 default: {
903 break;
904 }
905 }
906 }
907
ScanMinusPunctuator()908 void Lexer::ScanMinusPunctuator()
909 {
910 GetToken().type_ = TokenType::PUNCTUATOR_MINUS;
911
912 switch (Iterator().Peek()) {
913 case LEX_CHAR_MINUS: {
914 GetToken().type_ = TokenType::PUNCTUATOR_MINUS_MINUS;
915 Iterator().Forward(1);
916 break;
917 }
918 case LEX_CHAR_EQUALS: {
919 GetToken().type_ = TokenType::PUNCTUATOR_MINUS_EQUAL;
920 Iterator().Forward(1);
921 break;
922 }
923 default: {
924 break;
925 }
926 }
927 }
928
ScanSlashPunctuator()929 void Lexer::ScanSlashPunctuator()
930 {
931 GetToken().type_ = TokenType::PUNCTUATOR_DIVIDE;
932
933 switch (Iterator().Peek()) {
934 case LEX_CHAR_EQUALS: {
935 GetToken().type_ = TokenType::PUNCTUATOR_DIVIDE_EQUAL;
936 Iterator().Forward(1);
937 break;
938 }
939 default: {
940 break;
941 }
942 }
943 }
944
ScanDotPunctuator()945 void Lexer::ScanDotPunctuator()
946 {
947 GetToken().type_ = TokenType::PUNCTUATOR_PERIOD;
948
949 switch (Iterator().Peek()) {
950 case LEX_CHAR_0:
951 case LEX_CHAR_1:
952 case LEX_CHAR_2:
953 case LEX_CHAR_3:
954 case LEX_CHAR_4:
955 case LEX_CHAR_5:
956 case LEX_CHAR_6:
957 case LEX_CHAR_7:
958 case LEX_CHAR_8:
959 case LEX_CHAR_9: {
960 ScanNumber();
961 break;
962 }
963 case LEX_CHAR_QUESTION: {
964 GetToken().type_ = TokenType::PUNCTUATOR_PERIOD_QUESTION;
965 Iterator().Forward(1);
966 break;
967 }
968 case LEX_CHAR_DOT: {
969 Iterator().Forward(1);
970
971 if (Iterator().Peek() == LEX_CHAR_DOT) {
972 GetToken().type_ = TokenType::PUNCTUATOR_PERIOD_PERIOD_PERIOD;
973 Iterator().Forward(1);
974 break;
975 }
976
977 Iterator().Backward(1);
978 break;
979 }
980 default: {
981 break;
982 }
983 }
984 }
985
ScanAsterixPunctuator()986 void Lexer::ScanAsterixPunctuator()
987 {
988 GetToken().type_ = TokenType::PUNCTUATOR_MULTIPLY;
989
990 switch (Iterator().Peek()) {
991 case LEX_CHAR_ASTERISK: {
992 GetToken().type_ = TokenType::PUNCTUATOR_EXPONENTIATION;
993 Iterator().Forward(1);
994
995 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
996 GetToken().type_ = TokenType::PUNCTUATOR_EXPONENTIATION_EQUAL;
997 Iterator().Forward(1);
998 }
999 break;
1000 }
1001 case LEX_CHAR_EQUALS: {
1002 GetToken().type_ = TokenType::PUNCTUATOR_MULTIPLY_EQUAL;
1003 Iterator().Forward(1);
1004 break;
1005 }
1006 default: {
1007 break;
1008 }
1009 }
1010 }
1011
ScanPercentPunctuator()1012 void Lexer::ScanPercentPunctuator()
1013 {
1014 GetToken().type_ = TokenType::PUNCTUATOR_MOD;
1015
1016 switch (Iterator().Peek()) {
1017 case LEX_CHAR_EQUALS: {
1018 GetToken().type_ = TokenType::PUNCTUATOR_MOD_EQUAL;
1019 Iterator().Forward(1);
1020 break;
1021 }
1022 default: {
1023 break;
1024 }
1025 }
1026 }
1027
IsLineTerminatorOrEos() const1028 bool Lexer::IsLineTerminatorOrEos() const
1029 {
1030 switch (Iterator().PeekCp()) {
1031 case util::StringView::Iterator::INVALID_CP:
1032 case LEX_CHAR_LF:
1033 case LEX_CHAR_CR:
1034 case LEX_CHAR_LS:
1035 case LEX_CHAR_PS: {
1036 return true;
1037 }
1038 default: {
1039 break;
1040 }
1041 }
1042
1043 return false;
1044 }
1045
ScanRegExpPattern()1046 void Lexer::ScanRegExpPattern()
1047 {
1048 bool isCharClass = false;
1049 size_t cpSize {};
1050
1051 while (true) {
1052 switch (Iterator().PeekCp(&cpSize)) {
1053 case util::StringView::Iterator::INVALID_CP:
1054 case LEX_CHAR_LF:
1055 case LEX_CHAR_CR:
1056 case LEX_CHAR_LS:
1057 case LEX_CHAR_PS: {
1058 ThrowError("Unterminated RegExp");
1059 break;
1060 }
1061 case LEX_CHAR_SLASH: {
1062 if (!isCharClass) {
1063 return;
1064 }
1065
1066 break;
1067 }
1068 case LEX_CHAR_LEFT_SQUARE: {
1069 isCharClass = true;
1070 break;
1071 }
1072 case LEX_CHAR_RIGHT_SQUARE: {
1073 isCharClass = false;
1074 break;
1075 }
1076 case LEX_CHAR_BACKSLASH: {
1077 Iterator().Forward(1);
1078
1079 if (IsLineTerminatorOrEos()) {
1080 continue;
1081 }
1082
1083 Iterator().PeekCp(&cpSize);
1084 Iterator().Forward(cpSize);
1085 continue;
1086 }
1087 default: {
1088 break;
1089 }
1090 }
1091
1092 Iterator().Forward(cpSize);
1093 }
1094 }
1095
GetRegExpFlag(char32_t cp,RegExpFlags & flag)1096 bool Lexer::GetRegExpFlag(char32_t cp, RegExpFlags &flag)
1097 {
1098 switch (cp) {
1099 case LEX_CHAR_LOWERCASE_G: {
1100 flag = RegExpFlags::GLOBAL;
1101 break;
1102 }
1103 case LEX_CHAR_LOWERCASE_I: {
1104 flag = RegExpFlags::IGNORE_CASE;
1105 break;
1106 }
1107 case LEX_CHAR_LOWERCASE_M: {
1108 flag = RegExpFlags::MULTILINE;
1109 break;
1110 }
1111 case LEX_CHAR_LOWERCASE_S: {
1112 flag = RegExpFlags::DOTALL;
1113 break;
1114 }
1115 case LEX_CHAR_LOWERCASE_U: {
1116 flag = RegExpFlags::UNICODE;
1117 break;
1118 }
1119 case LEX_CHAR_LOWERCASE_Y: {
1120 flag = RegExpFlags::STICKY;
1121 break;
1122 }
1123 case LEX_CHAR_LOWERCASE_D: {
1124 flag = RegExpFlags::HAS_INDICES;
1125 break;
1126 }
1127 default: {
1128 return false;
1129 }
1130 }
1131 return true;
1132 }
1133
ScanRegExpFlags()1134 RegExpFlags Lexer::ScanRegExpFlags()
1135 {
1136 RegExpFlags resultFlags = RegExpFlags::EMPTY;
1137
1138 while (true) {
1139 size_t cpSize {};
1140 auto cp = Iterator().PeekCp(&cpSize);
1141 if (!KeywordsUtil::IsIdentifierPart(cp)) {
1142 break;
1143 }
1144
1145 Iterator().Forward(cpSize);
1146
1147 RegExpFlags flag = RegExpFlags::EMPTY;
1148
1149 if (!GetRegExpFlag(cp, flag)) {
1150 if (cp == LEX_CHAR_SP) {
1151 return resultFlags;
1152 } else {
1153 ThrowError("Invalid RegExp flag");
1154 }
1155 }
1156
1157 if (flag == RegExpFlags::EMPTY || (resultFlags & flag) != 0) {
1158 ThrowError("Invalid RegExp flag");
1159 }
1160
1161 resultFlags = resultFlags | flag;
1162 }
1163
1164 return resultFlags;
1165 }
1166
ScanRegExp()1167 RegExp Lexer::ScanRegExp()
1168 {
1169 GetToken().type_ = TokenType::LITERAL_REGEXP;
1170
1171 const auto patternStart = Iterator().Index();
1172 ScanRegExpPattern();
1173 const auto pattern = SourceView(patternStart, Iterator().Index());
1174
1175 ASSERT(Iterator().Peek() == LEX_CHAR_SLASH);
1176 Iterator().Forward(1);
1177
1178 const auto flagsStart = Iterator().Index();
1179 RegExpFlags resultFlags = ScanRegExpFlags();
1180 const auto flags = SourceView(flagsStart, Iterator().Index());
1181
1182 SkipWhiteSpaces();
1183 SetTokenEnd();
1184
1185 return {pattern, flags, resultFlags};
1186 }
1187
CheckArrow()1188 bool Lexer::CheckArrow()
1189 {
1190 if (Iterator().Peek() != LEX_CHAR_EQUALS) {
1191 return false;
1192 }
1193 Iterator().Forward(1);
1194
1195 bool res = Iterator().Peek() == LEX_CHAR_GREATER_THAN;
1196 Iterator().Backward(1);
1197
1198 return res;
1199 }
1200
SetTokenStart()1201 void Lexer::SetTokenStart()
1202 {
1203 if (pos_.nextTokenLine != 0) {
1204 pos_.line += pos_.nextTokenLine;
1205 pos_.nextTokenLine = 0;
1206 GetToken().flags_ = TokenFlags::NEW_LINE;
1207 } else {
1208 GetToken().flags_ = TokenFlags::NONE;
1209 }
1210
1211 pos_.token.loc_.start = SourcePosition {Iterator().Index(), pos_.line};
1212 }
1213
SetTokenEnd()1214 void Lexer::SetTokenEnd()
1215 {
1216 pos_.token.loc_.end = SourcePosition {Iterator().Index(), pos_.line};
1217 }
1218
CheckAwaitKeyword()1219 void Lexer::CheckAwaitKeyword()
1220 {
1221 if (parserContext_->IsStaticBlock()) {
1222 ThrowError("'await' is not allowed in class static block");
1223 }
1224 // support top level await for module
1225 if (!parserContext_->IsAsync()) {
1226 if (!parserContext_->IsModule() || parserContext_->GetProgram()->IsDtsFile()) {
1227 GetToken().type_ = TokenType::LITERAL_IDENT;
1228 return;
1229 }
1230 if (parserContext_->GetProgram()->Extension() == ScriptExtension::TS) {
1231 if (parserContext_->IsTsModule()) {
1232 GetToken().type_ = TokenType::LITERAL_IDENT;
1233 return;
1234 }
1235 }
1236 }
1237
1238 if (parserContext_->DisallowAwait()) {
1239 ThrowError("'await' is not allowed");
1240 }
1241 if (GetToken().flags_ & TokenFlags::HAS_ESCAPE) {
1242 ThrowError("Keyword must not contain escaped characters");
1243 }
1244 GetToken().type_ = TokenType::KEYW_AWAIT;
1245 }
1246
CheckArgumentsKeyword()1247 void Lexer::CheckArgumentsKeyword()
1248 {
1249 if (parserContext_->DisallowArguments()) {
1250 ThrowError("'arguments' is not allowed in static block and field initializer");
1251 }
1252 }
1253
CheckKeywordEscape(TokenType type)1254 void Lexer::CheckKeywordEscape(TokenType type)
1255 {
1256 if (GetToken().flags_ & TokenFlags::HAS_ESCAPE) {
1257 ThrowError("Escape sequences are not allowed in keywords");
1258 }
1259
1260 GetToken().type_ = type;
1261 }
1262
CheckEnumKeyword()1263 void Lexer::CheckEnumKeyword()
1264 {
1265 if (parserContext_->GetProgram()->Extension() == ScriptExtension::JS) {
1266 ThrowError("Unexpected reserved keyword");
1267 }
1268
1269 if (GetToken().flags_ & TokenFlags::HAS_ESCAPE) {
1270 ThrowError("Escape sequences are not allowed in keywords");
1271 }
1272
1273 GetToken().type_ = TokenType::LITERAL_IDENT;
1274 }
1275
CheckLetKeyword()1276 void Lexer::CheckLetKeyword()
1277 {
1278 GetToken().type_ = TokenType::KEYW_LET;
1279 }
1280
CheckYieldKeyword()1281 void Lexer::CheckYieldKeyword()
1282 {
1283 if (!parserContext_->AllowYield()) {
1284 ThrowError("'yield' is not allowed");
1285 }
1286
1287 GetToken().type_ = TokenType::KEYW_YIELD;
1288 }
1289
CheckFutureReservedKeyword(TokenType keywordType)1290 void Lexer::CheckFutureReservedKeyword(TokenType keywordType)
1291 {
1292 GetToken().type_ = TokenType::LITERAL_IDENT;
1293
1294 if (parserContext_->GetProgram()->Extension() == ScriptExtension::TS && keywordType <= TokenType::KEYW_INTERFACE) {
1295 return;
1296 }
1297
1298 ThrowError("Unexpected strict mode reserved keyword");
1299 }
1300
SkipWhiteSpaces()1301 void Lexer::SkipWhiteSpaces()
1302 {
1303 while (true) {
1304 auto cp = Iterator().Peek();
1305
1306 switch (cp) {
1307 case LEX_CHAR_HASH_MARK: {
1308 Iterator().Forward(1);
1309 cp = Iterator().Peek();
1310 if (cp != LEX_CHAR_EXCLAMATION) {
1311 Iterator().Backward(1);
1312 return;
1313 }
1314 if (Iterator().Index() != 1) {
1315 /*
1316 * according to ECMA-262 specification item 12.5 Hashbang Comments are location-sensitive.
1317 * only allowed occurs at the beginning of files, other position is illegal.
1318 */
1319 Iterator().Backward(1);
1320 ThrowError("Invalid or unexpected token");
1321 }
1322
1323 Iterator().Forward(1);
1324 SkipSingleLineComment();
1325 continue;
1326 }
1327 case LEX_CHAR_CR: {
1328 Iterator().Forward(1);
1329
1330 if (Iterator().Peek() != LEX_CHAR_LF) {
1331 Iterator().Backward(1);
1332 }
1333
1334 [[fallthrough]];
1335 }
1336 case LEX_CHAR_LF: {
1337 Iterator().Forward(1);
1338 pos_.nextTokenLine++;
1339 continue;
1340 }
1341 case LEX_CHAR_VT:
1342 case LEX_CHAR_FF:
1343 case LEX_CHAR_SP:
1344 case LEX_CHAR_TAB: {
1345 Iterator().Forward(1);
1346 continue;
1347 }
1348 case LEX_CHAR_SLASH: {
1349 Iterator().Forward(1);
1350 cp = Iterator().Peek();
1351 if (cp == LEX_CHAR_SLASH) {
1352 Iterator().Forward(1);
1353 SkipSingleLineComment();
1354 continue;
1355 }
1356 if (cp == LEX_CHAR_ASTERISK) {
1357 Iterator().Forward(1);
1358 SkipMultiLineComment();
1359 continue;
1360 }
1361
1362 Iterator().Backward(1);
1363 return;
1364 }
1365 default: {
1366 if (cp < LEX_ASCII_MAX_BITS) {
1367 return;
1368 }
1369
1370 size_t cpSize {};
1371 cp = Iterator().PeekCp(&cpSize);
1372
1373 switch (cp) {
1374 case LEX_CHAR_LS:
1375 case LEX_CHAR_PS: {
1376 pos_.nextTokenLine++;
1377 [[fallthrough]];
1378 }
1379 case LEX_CHAR_NBSP:
1380 case LEX_CHAR_NLINE:
1381 case LEX_CHAR_IGSP:
1382 case LEX_CHAR_ZWNBSP: {
1383 Iterator().Forward(cpSize);
1384 continue;
1385 }
1386 default: {
1387 return;
1388 }
1389 }
1390 }
1391 }
1392 }
1393 }
1394
1395 // NOLINTNEXTLINE(readability-function-size)
NextToken(LexerNextTokenFlags flags)1396 void Lexer::NextToken(LexerNextTokenFlags flags)
1397 {
1398 Keywords kws(this, flags);
1399 KeywordsUtil &kwu = kws.Util();
1400
1401 SetTokenStart();
1402
1403 auto cp = Iterator().Peek();
1404 Iterator().Forward(1);
1405
1406 GetToken().keywordType_ = TokenType::EOS;
1407
1408 switch (cp) {
1409 case LEX_CHAR_EXCLAMATION: {
1410 ScanExclamationPunctuator();
1411 break;
1412 }
1413 case LEX_CHAR_SINGLE_QUOTE: {
1414 ScanString<LEX_CHAR_SINGLE_QUOTE>();
1415 break;
1416 }
1417 case LEX_CHAR_DOUBLE_QUOTE: {
1418 ScanString<LEX_CHAR_DOUBLE_QUOTE>();
1419 break;
1420 }
1421 case LEX_CHAR_HASH_MARK: {
1422 GetToken().type_ = TokenType::PUNCTUATOR_HASH_MARK;
1423 break;
1424 }
1425 case LEX_CHAR_PERCENT: {
1426 ScanPercentPunctuator();
1427 break;
1428 }
1429 case LEX_CHAR_AMPERSAND: {
1430 ScanAmpersandPunctuator();
1431 break;
1432 }
1433 case LEX_CHAR_LEFT_PAREN: {
1434 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_PARENTHESIS;
1435 break;
1436 }
1437 case LEX_CHAR_RIGHT_PAREN: {
1438 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_PARENTHESIS;
1439 break;
1440 }
1441 case LEX_CHAR_ASTERISK: {
1442 ScanAsterixPunctuator();
1443 break;
1444 }
1445 case LEX_CHAR_PLUS: {
1446 ScanPlusPunctuator();
1447 break;
1448 }
1449 case LEX_CHAR_COMMA: {
1450 GetToken().type_ = TokenType::PUNCTUATOR_COMMA;
1451 break;
1452 }
1453 case LEX_CHAR_MINUS: {
1454 ScanMinusPunctuator();
1455 break;
1456 }
1457 case LEX_CHAR_DOT: {
1458 ScanDotPunctuator();
1459 break;
1460 }
1461 case LEX_CHAR_SLASH: {
1462 ScanSlashPunctuator();
1463 break;
1464 }
1465 case LEX_CHAR_0: {
1466 ScanNumberLeadingZero();
1467 break;
1468 }
1469 case LEX_CHAR_1:
1470 case LEX_CHAR_2:
1471 case LEX_CHAR_3:
1472 case LEX_CHAR_4:
1473 case LEX_CHAR_5:
1474 case LEX_CHAR_6:
1475 case LEX_CHAR_7:
1476 case LEX_CHAR_8:
1477 case LEX_CHAR_9: {
1478 ScanNumber();
1479 break;
1480 }
1481 case LEX_CHAR_COLON: {
1482 GetToken().type_ = TokenType::PUNCTUATOR_COLON;
1483 break;
1484 }
1485 case LEX_CHAR_SEMICOLON: {
1486 GetToken().type_ = TokenType::PUNCTUATOR_SEMI_COLON;
1487 break;
1488 }
1489 case LEX_CHAR_LESS_THAN: {
1490 ScanLessThanPunctuator();
1491 break;
1492 }
1493 case LEX_CHAR_EQUALS: {
1494 ScanEqualsPunctuator();
1495 break;
1496 }
1497 case LEX_CHAR_GREATER_THAN: {
1498 ScanGreaterThanPunctuator();
1499 break;
1500 }
1501 case LEX_CHAR_QUESTION: {
1502 ScanQuestionPunctuator();
1503 break;
1504 }
1505 case LEX_CHAR_AT: {
1506 GetToken().type_ = TokenType::PUNCTUATOR_AT;
1507 break;
1508 }
1509 case LEX_CHAR_DOLLAR_SIGN:
1510 case LEX_CHAR_UPPERCASE_A:
1511 case LEX_CHAR_UPPERCASE_B:
1512 case LEX_CHAR_UPPERCASE_C:
1513 case LEX_CHAR_UPPERCASE_D:
1514 case LEX_CHAR_UPPERCASE_E:
1515 case LEX_CHAR_UPPERCASE_F:
1516 case LEX_CHAR_UPPERCASE_G:
1517 case LEX_CHAR_UPPERCASE_H:
1518 case LEX_CHAR_UPPERCASE_I:
1519 case LEX_CHAR_UPPERCASE_J:
1520 case LEX_CHAR_UPPERCASE_K:
1521 case LEX_CHAR_UPPERCASE_L:
1522 case LEX_CHAR_UPPERCASE_M:
1523 case LEX_CHAR_UPPERCASE_N:
1524 case LEX_CHAR_UPPERCASE_O:
1525 case LEX_CHAR_UPPERCASE_P:
1526 case LEX_CHAR_UPPERCASE_Q:
1527 case LEX_CHAR_UPPERCASE_R:
1528 case LEX_CHAR_UPPERCASE_S:
1529 case LEX_CHAR_UPPERCASE_T:
1530 case LEX_CHAR_UPPERCASE_U:
1531 case LEX_CHAR_UPPERCASE_V:
1532 case LEX_CHAR_UPPERCASE_W:
1533 case LEX_CHAR_UPPERCASE_X:
1534 case LEX_CHAR_UPPERCASE_Y:
1535 case LEX_CHAR_UPPERCASE_Z:
1536 case LEX_CHAR_UNDERSCORE:
1537 case LEX_CHAR_LOWERCASE_H:
1538 case LEX_CHAR_LOWERCASE_J:
1539 case LEX_CHAR_LOWERCASE_Q:
1540 case LEX_CHAR_LOWERCASE_X:
1541 case LEX_CHAR_LOWERCASE_Z: {
1542 kwu.ScanIdContinue();
1543 break;
1544 }
1545 case LEX_CHAR_LEFT_SQUARE: {
1546 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_SQUARE_BRACKET;
1547 break;
1548 }
1549 case LEX_CHAR_BACKSLASH: {
1550 GetToken().flags_ |= TokenFlags::HAS_ESCAPE;
1551
1552 if (Iterator().Peek() != LEX_CHAR_LOWERCASE_U) {
1553 ThrowError("Invalid character");
1554 }
1555
1556 cp = ScanUnicodeEscapeSequence();
1557
1558 kwu.ScanIdentifierStart(cp);
1559 break;
1560 }
1561 case LEX_CHAR_RIGHT_SQUARE: {
1562 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_SQUARE_BRACKET;
1563 break;
1564 }
1565 case LEX_CHAR_CIRCUMFLEX: {
1566 ScanCircumflexPunctuator();
1567 break;
1568 }
1569 case LEX_CHAR_BACK_TICK: {
1570 GetToken().type_ = TokenType::PUNCTUATOR_BACK_TICK;
1571 SetTokenEnd();
1572 return;
1573 }
1574 case LEX_CHAR_LOWERCASE_A: {
1575 kws.ScanA();
1576 break;
1577 }
1578 case LEX_CHAR_LOWERCASE_B: {
1579 kws.ScanB();
1580 break;
1581 }
1582 case LEX_CHAR_LOWERCASE_C: {
1583 kws.ScanC();
1584 break;
1585 }
1586 case LEX_CHAR_LOWERCASE_D: {
1587 kws.ScanD();
1588 break;
1589 }
1590 case LEX_CHAR_LOWERCASE_E: {
1591 kws.ScanE();
1592 break;
1593 }
1594 case LEX_CHAR_LOWERCASE_F: {
1595 kws.ScanF();
1596 break;
1597 }
1598 case LEX_CHAR_LOWERCASE_G: {
1599 kws.ScanG();
1600 break;
1601 }
1602 case LEX_CHAR_LOWERCASE_I: {
1603 kws.ScanI();
1604 break;
1605 }
1606 case LEX_CHAR_LOWERCASE_K: {
1607 kws.ScanK();
1608 break;
1609 }
1610 case LEX_CHAR_LOWERCASE_L: {
1611 kws.ScanL();
1612 break;
1613 }
1614 case LEX_CHAR_LOWERCASE_M: {
1615 kws.ScanM();
1616 break;
1617 }
1618 case LEX_CHAR_LOWERCASE_N: {
1619 kws.ScanN();
1620 break;
1621 }
1622 case LEX_CHAR_LOWERCASE_O: {
1623 kws.ScanO();
1624 break;
1625 }
1626 case LEX_CHAR_LOWERCASE_P: {
1627 kws.ScanP();
1628 break;
1629 }
1630 case LEX_CHAR_LOWERCASE_R: {
1631 kws.ScanR();
1632 break;
1633 }
1634 case LEX_CHAR_LOWERCASE_S: {
1635 kws.ScanS();
1636 break;
1637 }
1638 case LEX_CHAR_LOWERCASE_T: {
1639 kws.ScanT();
1640 break;
1641 }
1642 case LEX_CHAR_LOWERCASE_U: {
1643 kws.ScanU();
1644 break;
1645 }
1646 case LEX_CHAR_LOWERCASE_V: {
1647 kws.ScanV();
1648 break;
1649 }
1650 case LEX_CHAR_LOWERCASE_W: {
1651 kws.ScanW();
1652 break;
1653 }
1654 case LEX_CHAR_LOWERCASE_Y: {
1655 kws.ScanY();
1656 break;
1657 }
1658 case LEX_CHAR_LEFT_BRACE: {
1659 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_BRACE;
1660
1661 if (tlCtx_) {
1662 tlCtx_->ConsumeLeftBrace();
1663 }
1664
1665 break;
1666 }
1667 case LEX_CHAR_VLINE: {
1668 ScanVLinePunctuator();
1669 break;
1670 }
1671 case LEX_CHAR_RIGHT_BRACE: {
1672 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_BRACE;
1673
1674 if (tlCtx_ && tlCtx_->ConsumeRightBrace()) {
1675 SetTokenEnd();
1676 return;
1677 }
1678
1679 break;
1680 }
1681 case LEX_CHAR_TILDE: {
1682 GetToken().type_ = TokenType::PUNCTUATOR_TILDE;
1683 break;
1684 }
1685 default: {
1686 Iterator().Backward(1);
1687
1688 if (cp == util::StringView::Iterator::INVALID_CP) {
1689 GetToken().type_ = TokenType::EOS;
1690 break;
1691 }
1692
1693 cp = Iterator().Next();
1694 kwu.ScanIdentifierStart(cp);
1695 break;
1696 }
1697 }
1698
1699 SetTokenEnd();
1700 SkipWhiteSpaces();
1701 }
1702
AssignTokenEscapeError()1703 void Lexer::AssignTokenEscapeError()
1704 {
1705 GetToken().flags_ |= TokenFlags::ESCAPE_ERROR;
1706 }
1707
AssignTokenTaggedTemplate()1708 void Lexer::AssignTokenTaggedTemplate()
1709 {
1710 GetToken().flags_ |= TokenFlags::TAGGED_TEMPLATE;
1711 }
1712
CheckTokenIsTaggedTemplate() const1713 bool Lexer::CheckTokenIsTaggedTemplate() const
1714 {
1715 return GetToken().IsTaggedTemplate();
1716 }
1717
1718 } // namespace panda::es2panda::lexer
1719