1 /**
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "lexer.h"
17
18 #include <es2panda.h>
19 #include <gen/keywords.h>
20 #include <lexer/token/letters.h>
21 #include <lexer/token/tokenType.h>
22 #include <parser/context/parserContext.h>
23
24 #include <array>
25
26 namespace panda::es2panda::lexer {
27
LexerPosition(const util::StringView & source)28 LexerPosition::LexerPosition(const util::StringView &source) : iterator(source) {}
29
Lexer(const parser::ParserContext * parserContext)30 Lexer::Lexer(const parser::ParserContext *parserContext)
31 : allocator_(parserContext->GetProgram()->Allocator()),
32 parserContext_(parserContext),
33 source_(parserContext->GetProgram()->SourceCode()),
34 pos_(source_)
35 {
36 SkipWhiteSpaces();
37 }
38
ScanUnicodeEscapeSequence()39 char32_t Lexer::ScanUnicodeEscapeSequence()
40 {
41 ASSERT(Iterator().Peek() == LEX_CHAR_LOWERCASE_U);
42
43 Iterator().Forward(1);
44
45 if (Iterator().Peek() == LEX_CHAR_LEFT_BRACE) {
46 Iterator().Forward(1);
47 return ScanUnicodeCodePointEscape();
48 }
49
50 // 4: a template parameter to the expected fixed length when scanning Unicode escape sequences
51 return ScanHexEscape<4>();
52 }
53
ScanUnicodeCodePointEscape()54 char32_t Lexer::ScanUnicodeCodePointEscape()
55 {
56 double code = 0;
57 char32_t cp {};
58
59 while (true) {
60 cp = Iterator().Peek();
61 if (!IsHexDigit(cp)) {
62 break;
63 }
64
65 Iterator().Forward(1);
66
67 constexpr auto multiplier = 16;
68 code = code * multiplier + HexValue(cp);
69 if (code > UNICODE_CODE_POINT_MAX) {
70 if (CheckTokenIsTaggedTemplate()) {
71 AssignTokenEscapeError();
72 break;
73 }
74 ThrowError("Invalid unicode escape sequence");
75 }
76 }
77 if (cp != LEX_CHAR_RIGHT_BRACE) {
78 if (CheckTokenIsTaggedTemplate()) {
79 AssignTokenEscapeError();
80 return static_cast<char32_t>(code);
81 } else {
82 ThrowError("Invalid unicode escape sequence");
83 }
84 }
85
86 Iterator().Forward(1);
87 return static_cast<char32_t>(code);
88 }
89
Allocator()90 ArenaAllocator *Lexer::Allocator()
91 {
92 return allocator_;
93 }
94
GetToken()95 Token &Lexer::GetToken()
96 {
97 return pos_.token;
98 }
99
GetToken() const100 const Token &Lexer::GetToken() const
101 {
102 return pos_.token;
103 }
104
Line() const105 size_t Lexer::Line() const
106 {
107 return pos_.line;
108 }
109
Save() const110 LexerPosition Lexer::Save() const
111 {
112 return pos_;
113 }
114
BackwardToken(TokenType type,size_t offset)115 void Lexer::BackwardToken(TokenType type, size_t offset)
116 {
117 pos_.token.type_ = type;
118 pos_.iterator.Reset(GetToken().End().index - offset);
119 pos_.nextTokenLine = 0;
120 }
121
ForwardToken(TokenType type,size_t offset)122 void Lexer::ForwardToken(TokenType type, size_t offset)
123 {
124 SetTokenStart();
125 pos_.token.type_ = type;
126 pos_.iterator.Forward(offset);
127 SetTokenEnd();
128 SkipWhiteSpaces();
129 }
130
Rewind(const LexerPosition & pos)131 void Lexer::Rewind(const LexerPosition &pos)
132 {
133 pos_ = pos;
134 }
135
Lookahead()136 char32_t Lexer::Lookahead()
137 {
138 return Iterator().Peek();
139 }
140
SourceView(const util::StringView::Iterator & begin,const util::StringView::Iterator & end) const141 util::StringView Lexer::SourceView(const util::StringView::Iterator &begin, const util::StringView::Iterator &end) const
142 {
143 return SourceView(begin.Index(), end.Index());
144 }
145
SourceView(size_t begin,size_t end) const146 util::StringView Lexer::SourceView(size_t begin, size_t end) const
147 {
148 return source_.Substr(begin, end);
149 }
150
SkipMultiLineComment()151 void Lexer::SkipMultiLineComment()
152 {
153 while (true) {
154 switch (Iterator().Next()) {
155 case util::StringView::Iterator::INVALID_CP: {
156 ThrowError("Unterminated multi-line comment");
157 break;
158 }
159 case LEX_CHAR_CR: {
160 if (Iterator().Peek() == LEX_CHAR_LF) {
161 Iterator().Forward(1);
162 }
163
164 [[fallthrough]];
165 }
166 case LEX_CHAR_LF:
167 case LEX_CHAR_LS:
168 case LEX_CHAR_PS: {
169 pos_.nextTokenLine++;
170 continue;
171 }
172 case LEX_CHAR_ASTERISK: {
173 if (Iterator().Peek() == LEX_CHAR_SLASH) {
174 Iterator().Forward(1);
175 return;
176 }
177
178 break;
179 }
180 default: {
181 break;
182 }
183 }
184 }
185 }
186
187 /* New line character is not processed */
SkipSingleLineComment()188 void Lexer::SkipSingleLineComment()
189 {
190 while (true) {
191 switch (Iterator().Next()) {
192 case util::StringView::Iterator::INVALID_CP:
193 case LEX_CHAR_CR: {
194 if (Iterator().Peek() == LEX_CHAR_LF) {
195 Iterator().Forward(1);
196 }
197
198 [[fallthrough]];
199 }
200 case LEX_CHAR_LF:
201 case LEX_CHAR_LS:
202 case LEX_CHAR_PS: {
203 pos_.nextTokenLine++;
204 return;
205 }
206 default: {
207 break;
208 }
209 }
210 }
211 }
212
ThrowError(std::string_view message)213 void Lexer::ThrowError(std::string_view message)
214 {
215 lexer::LineIndex lineIndex = parserContext_->GetProgram()->GetLineIndex();
216 SourceLocation loc = lineIndex.GetLocation(SourcePosition(Iterator().Index(), pos_.line + pos_.nextTokenLine));
217 throw es2panda::Error(es2panda::ErrorType::SYNTAX, message, loc.line, loc.col);
218 }
219
CheckNumberLiteralEnd()220 void Lexer::CheckNumberLiteralEnd()
221 {
222 if (Iterator().Peek() == LEX_CHAR_LOWERCASE_N) {
223 Iterator().Forward(1);
224 GetToken().flags_ |= TokenFlags::NUMBER_BIGINT;
225 }
226
227 GetToken().src_ = SourceView(GetToken().Start().index, Iterator().Index());
228 const auto nextCp = Iterator().PeekCp();
229 if (KeywordsUtil::IsIdentifierStart(nextCp) || IsDecimalDigit(nextCp)) {
230 ThrowError("Invalid numeric literal");
231 }
232 }
233
ScanNumberLeadingZero()234 void Lexer::ScanNumberLeadingZero()
235 {
236 GetToken().type_ = TokenType::LITERAL_NUMBER;
237
238 switch (Iterator().Peek()) {
239 case LEX_CHAR_LOWERCASE_X:
240 case LEX_CHAR_UPPERCASE_X: {
241 Iterator().Forward(1);
242 constexpr auto RADIX = 16;
243 ScanNumberRadix<IsHexDigit, RADIX>();
244 CheckNumberLiteralEnd();
245 return;
246 }
247 case LEX_CHAR_LOWERCASE_B:
248 case LEX_CHAR_UPPERCASE_B: {
249 Iterator().Forward(1);
250 constexpr auto RADIX = 2;
251 ScanNumberRadix<IsBinaryDigit, RADIX>();
252 CheckNumberLiteralEnd();
253 return;
254 }
255 case LEX_CHAR_LOWERCASE_O:
256 case LEX_CHAR_UPPERCASE_O: {
257 Iterator().Forward(1);
258 constexpr auto RADIX = 8;
259 ScanNumberRadix<IsOctalDigit, RADIX>();
260
261 switch (Iterator().Peek()) {
262 case LEX_CHAR_8:
263 case LEX_CHAR_9: {
264 ThrowError("Invalid octal digit");
265 }
266 default: {
267 break;
268 }
269 }
270
271 CheckNumberLiteralEnd();
272 return;
273 }
274 case LEX_CHAR_0:
275 case LEX_CHAR_1:
276 case LEX_CHAR_2:
277 case LEX_CHAR_3:
278 case LEX_CHAR_4:
279 case LEX_CHAR_5:
280 case LEX_CHAR_6:
281 case LEX_CHAR_7: {
282 ThrowError("Implicit octal literal not allowed");
283 break;
284 }
285 case LEX_CHAR_8:
286 case LEX_CHAR_9: {
287 ThrowError("NonOctalDecimalIntegerLiteral is not enabled in strict mode code");
288 break;
289 }
290 default: {
291 break;
292 }
293 }
294
295 ScanNumber(Iterator().Peek() == LEX_CHAR_0);
296 }
297
ScanDecimalNumbers(bool allowNumericSeparator)298 void Lexer::ScanDecimalNumbers(bool allowNumericSeparator)
299 {
300 bool allowNumericOnNext = true;
301
302 while (true) {
303 switch (Iterator().Peek()) {
304 case LEX_CHAR_0:
305 case LEX_CHAR_1:
306 case LEX_CHAR_2:
307 case LEX_CHAR_3:
308 case LEX_CHAR_4:
309 case LEX_CHAR_5:
310 case LEX_CHAR_6:
311 case LEX_CHAR_7:
312 case LEX_CHAR_8:
313 case LEX_CHAR_9: {
314 Iterator().Forward(1);
315 allowNumericOnNext = true;
316 break;
317 }
318 case LEX_CHAR_UNDERSCORE: {
319 Iterator().Backward(1);
320 isUnderscore_ = true;
321
322 if (Iterator().Peek() == LEX_CHAR_DOT || !allowNumericSeparator || !allowNumericOnNext) {
323 Iterator().Forward(1);
324 ThrowError("Invalid numeric separator");
325 }
326
327 GetToken().flags_ |= TokenFlags::NUMBER_HAS_UNDERSCORE;
328 Iterator().Forward(2);
329 allowNumericOnNext = false;
330 break;
331 }
332 default: {
333 if (!allowNumericOnNext) {
334 ThrowError("Numeric separators are not allowed at the end of numeric literals");
335 }
336 return;
337 }
338 }
339 }
340 }
341
ConvertNumber(size_t exponentSignPos)342 void Lexer::ConvertNumber(size_t exponentSignPos)
343 {
344 util::StringView sv = SourceView(GetToken().Start().index, Iterator().Index());
345 std::string utf8 = std::string {sv.Utf8()};
346 bool needConversion = false;
347
348 if (exponentSignPos != std::numeric_limits<size_t>::max()) {
349 utf8.insert(exponentSignPos, 1, '+');
350 needConversion = true;
351 }
352
353 if (GetToken().flags_ & TokenFlags::NUMBER_HAS_UNDERSCORE) {
354 utf8.erase(std::remove(utf8.begin(), utf8.end(), LEX_CHAR_UNDERSCORE), utf8.end());
355 needConversion = true;
356 }
357
358 if (needConversion) {
359 util::UString converted(utf8, Allocator());
360 GetToken().src_ = converted.View();
361 } else {
362 GetToken().src_ = sv;
363 }
364
365 try {
366 GetToken().number_ = static_cast<double>(std::stold(utf8, nullptr));
367 } catch (const std::invalid_argument &) {
368 ThrowError("Invalid number");
369 } catch (const std::out_of_range &) {
370 // TODO(frobert): look for a more elegant solution to this
371 GetToken().number_ = std::numeric_limits<double>::infinity();
372 }
373 }
ScanNumber(bool allowNumericSeparator,bool allowBigInt)374 void Lexer::ScanNumber(bool allowNumericSeparator, bool allowBigInt)
375 {
376 GetToken().type_ = TokenType::LITERAL_NUMBER;
377
378 ScanDecimalNumbers(allowNumericSeparator);
379
380 size_t exponentSignPos = std::numeric_limits<size_t>::max();
381 bool parseExponent = true;
382
383 if (Iterator().Peek() == LEX_CHAR_DOT) {
384 allowBigInt = false;
385 Iterator().Forward(1);
386
387 auto cp = Iterator().Peek();
388 if (IsDecimalDigit(cp) || cp == LEX_CHAR_LOWERCASE_E || LEX_CHAR_UPPERCASE_E) {
389 ScanDecimalNumbers(allowNumericSeparator);
390 } else {
391 parseExponent = false;
392 }
393 }
394
395 switch (Iterator().Peek()) {
396 case LEX_CHAR_LOWERCASE_E:
397 case LEX_CHAR_UPPERCASE_E: {
398 allowBigInt = false;
399
400 if (!parseExponent) {
401 break;
402 }
403
404 Iterator().Forward(1);
405
406 switch (Iterator().Peek()) {
407 case LEX_CHAR_UNDERSCORE: {
408 break;
409 }
410 case LEX_CHAR_PLUS:
411 case LEX_CHAR_MINUS: {
412 Iterator().Forward(1);
413 break;
414 }
415 default: {
416 exponentSignPos = Iterator().Index() - GetToken().Start().index;
417 break;
418 }
419 }
420
421 if (!IsDecimalDigit(Iterator().Peek())) {
422 ThrowError("Invalid numeric literal");
423 }
424 ScanDecimalNumbers(allowNumericSeparator);
425 break;
426 }
427 default: {
428 break;
429 }
430 }
431
432 CheckNumberLiteralEnd();
433
434 if (GetToken().flags_ & TokenFlags::NUMBER_BIGINT) {
435 if (!allowBigInt) {
436 ThrowError("Invalid BigInt number");
437 }
438 if (isUnderscore_) {
439 ConvertNumber(exponentSignPos);
440 isUnderscore_ = false;
441 }
442
443 return;
444 }
445
446 ConvertNumber(exponentSignPos);
447 }
448
PushTemplateContext(TemplateLiteralParserContext * ctx)449 void Lexer::PushTemplateContext(TemplateLiteralParserContext *ctx)
450 {
451 tlCtx_ = ctx;
452 }
453
ScanTemplateStringEnd()454 void Lexer::ScanTemplateStringEnd()
455 {
456 ASSERT(Iterator().Peek() == LEX_CHAR_BACK_TICK);
457 Iterator().Forward(1);
458 SetTokenEnd();
459 SkipWhiteSpaces();
460 }
461
ScanTemplateString()462 LexerTemplateString Lexer::ScanTemplateString()
463 {
464 LexerTemplateString templateStr(Allocator());
465 size_t cpSize = 0;
466
467 while (true) {
468 char32_t cp = Iterator().PeekCp(&cpSize);
469
470 switch (cp) {
471 case util::StringView::Iterator::INVALID_CP: {
472 ThrowError("Unexpected token, expected '${' or '`'");
473 break;
474 }
475 case LEX_CHAR_BACK_TICK: {
476 templateStr.end = Iterator().Index();
477 return templateStr;
478 }
479 case LEX_CHAR_CR: {
480 Iterator().Forward(1);
481
482 if (Iterator().Peek() != LEX_CHAR_LF) {
483 Iterator().Backward(1);
484 }
485
486 [[fallthrough]];
487 }
488 case LEX_CHAR_LF: {
489 pos_.line++;
490 templateStr.str.Append(LEX_CHAR_LF);
491 Iterator().Forward(1);
492 continue;
493 }
494 case LEX_CHAR_BACKSLASH: {
495 Iterator().Forward(1);
496
497 char32_t nextCp = Iterator().Peek();
498 if (nextCp == LEX_CHAR_BACK_TICK || nextCp == LEX_CHAR_BACKSLASH || nextCp == LEX_CHAR_DOLLAR_SIGN) {
499 templateStr.str.Append(cp);
500 templateStr.str.Append(nextCp);
501 Iterator().Forward(1);
502 continue;
503 }
504
505 Iterator().Backward(1);
506 break;
507 }
508 case LEX_CHAR_DOLLAR_SIGN: {
509 templateStr.end = Iterator().Index();
510 Iterator().Forward(1);
511
512 if (Iterator().Peek() == LEX_CHAR_LEFT_BRACE) {
513 Iterator().Forward(1);
514 templateStr.scanExpression = true;
515 SkipWhiteSpaces();
516 return templateStr;
517 }
518
519 templateStr.str.Append(cp);
520 continue;
521 }
522 default: {
523 break;
524 }
525 }
526
527 templateStr.str.Append(cp);
528 Iterator().Forward(cpSize);
529 }
530
531 UNREACHABLE();
532 return templateStr;
533 }
534
ResetTokenEnd()535 void Lexer::ResetTokenEnd()
536 {
537 SetTokenStart();
538 pos_.iterator.Reset(GetToken().End().index);
539 pos_.line = GetToken().End().line;
540 pos_.nextTokenLine = 0;
541 }
542
ScanStringUnicodePart(util::UString * str)543 void Lexer::ScanStringUnicodePart(util::UString *str)
544 {
545 size_t cpSize {};
546 char32_t cp = Iterator().PeekCp(&cpSize);
547
548 switch (cp) {
549 case util::StringView::Iterator::INVALID_CP: {
550 ThrowError("Unterminated string");
551 break;
552 }
553 case LEX_CHAR_CR: {
554 Iterator().Forward(1);
555 if (Iterator().Peek() != LEX_CHAR_LF) {
556 Iterator().Backward(1);
557 }
558
559 [[fallthrough]];
560 }
561 case LEX_CHAR_LS:
562 case LEX_CHAR_PS:
563 case LEX_CHAR_LF: {
564 pos_.line++;
565 Iterator().Forward(cpSize);
566 return;
567 }
568 case LEX_CHAR_LOWERCASE_B: {
569 cp = LEX_CHAR_BS;
570 break;
571 }
572 case LEX_CHAR_LOWERCASE_T: {
573 cp = LEX_CHAR_TAB;
574 break;
575 }
576 case LEX_CHAR_LOWERCASE_N: {
577 cp = LEX_CHAR_LF;
578 break;
579 }
580 case LEX_CHAR_LOWERCASE_V: {
581 cp = LEX_CHAR_VT;
582 break;
583 }
584 case LEX_CHAR_LOWERCASE_F: {
585 cp = LEX_CHAR_FF;
586 break;
587 }
588 case LEX_CHAR_LOWERCASE_R: {
589 cp = LEX_CHAR_CR;
590 break;
591 }
592 case LEX_CHAR_LOWERCASE_X: {
593 Iterator().Forward(1);
594 // 2: a template parameter to the expected fixed length when scanning Unicode escape sequences
595 str->Append(ScanHexEscape<2>());
596 return;
597 }
598 case LEX_CHAR_LOWERCASE_U: {
599 cp = ScanUnicodeEscapeSequence();
600 str->Append(cp);
601 return;
602 }
603 case LEX_CHAR_0: {
604 Iterator().Forward(1);
605 bool isDecimal = IsDecimalDigit(Iterator().Peek());
606 bool isOctal = IsOctalDigit(Iterator().Peek());
607 Iterator().Backward(1);
608
609 if (!isDecimal) {
610 cp = LEX_CHAR_NULL;
611 break;
612 }
613
614 if (isOctal) {
615 if (CheckTokenIsTaggedTemplate()) {
616 AssignTokenEscapeError();
617 break;
618 }
619 ThrowError("Octal escape sequences are not allowed in strict mode");
620 }
621
622 [[fallthrough]];
623 }
624 default: {
625 if (IsDecimalDigit(Iterator().Peek())) {
626 if (CheckTokenIsTaggedTemplate()) {
627 AssignTokenEscapeError();
628 break;
629 }
630 ThrowError("Invalid character escape sequence in strict mode");
631 }
632
633 break;
634 }
635 }
636
637 Iterator().Forward(cpSize);
638 str->Append(cp);
639 }
640
ScanQuestionPunctuator()641 void Lexer::ScanQuestionPunctuator()
642 {
643 GetToken().type_ = TokenType::PUNCTUATOR_QUESTION_MARK;
644
645 switch (Iterator().Peek()) {
646 case LEX_CHAR_QUESTION: {
647 GetToken().type_ = TokenType::PUNCTUATOR_NULLISH_COALESCING;
648 Iterator().Forward(1);
649
650 switch (Iterator().Peek()) {
651 case LEX_CHAR_EQUALS: {
652 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_NULLISH_EQUAL;
653 Iterator().Forward(1);
654 break;
655 }
656 default: {
657 break;
658 }
659 }
660
661 break;
662 }
663 case LEX_CHAR_DOT: {
664 Iterator().Forward(1);
665
666 if (!IsDecimalDigit(Iterator().Peek())) {
667 GetToken().type_ = TokenType::PUNCTUATOR_QUESTION_DOT;
668 return;
669 }
670
671 Iterator().Backward(1);
672 break;
673 }
674 default: {
675 break;
676 }
677 }
678 }
679
ScanLessThanPunctuator()680 void Lexer::ScanLessThanPunctuator()
681 {
682 GetToken().type_ = TokenType::PUNCTUATOR_LESS_THAN;
683
684 switch (Iterator().Peek()) {
685 case LEX_CHAR_LESS_THAN: {
686 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_SHIFT;
687 Iterator().Forward(1);
688
689 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
690 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_SHIFT_EQUAL;
691 Iterator().Forward(1);
692 }
693 break;
694 }
695 case LEX_CHAR_EQUALS: {
696 GetToken().type_ = TokenType::PUNCTUATOR_LESS_THAN_EQUAL;
697 Iterator().Forward(1);
698 break;
699 }
700 default: {
701 break;
702 }
703 }
704 }
705
ScanGreaterThanPunctuator()706 void Lexer::ScanGreaterThanPunctuator()
707 {
708 GetToken().type_ = TokenType::PUNCTUATOR_GREATER_THAN;
709
710 switch (Iterator().Peek()) {
711 case LEX_CHAR_GREATER_THAN: {
712 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_SHIFT;
713 Iterator().Forward(1);
714
715 switch (Iterator().Peek()) {
716 case LEX_CHAR_GREATER_THAN: {
717 GetToken().type_ = TokenType::PUNCTUATOR_UNSIGNED_RIGHT_SHIFT;
718 Iterator().Forward(1);
719
720 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
721 GetToken().type_ = TokenType::PUNCTUATOR_UNSIGNED_RIGHT_SHIFT_EQUAL;
722 Iterator().Forward(1);
723 }
724 break;
725 }
726 case LEX_CHAR_EQUALS: {
727 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_SHIFT_EQUAL;
728 Iterator().Forward(1);
729 break;
730 }
731 default: {
732 break;
733 }
734 }
735 break;
736 }
737 case LEX_CHAR_EQUALS: {
738 GetToken().type_ = TokenType::PUNCTUATOR_GREATER_THAN_EQUAL;
739 Iterator().Forward(1);
740 break;
741 }
742 default: {
743 break;
744 }
745 }
746 }
747
ScanEqualsPunctuator()748 void Lexer::ScanEqualsPunctuator()
749 {
750 GetToken().type_ = TokenType::PUNCTUATOR_SUBSTITUTION;
751
752 switch (Iterator().Peek()) {
753 case LEX_CHAR_EQUALS: {
754 GetToken().type_ = TokenType::PUNCTUATOR_EQUAL;
755 Iterator().Forward(1);
756
757 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
758 GetToken().type_ = TokenType::PUNCTUATOR_STRICT_EQUAL;
759 Iterator().Forward(1);
760 }
761 break;
762 }
763 case LEX_CHAR_GREATER_THAN: {
764 GetToken().type_ = TokenType::PUNCTUATOR_ARROW;
765 Iterator().Forward(1);
766 break;
767 }
768 default: {
769 break;
770 }
771 }
772 }
773
ScanExclamationPunctuator()774 void Lexer::ScanExclamationPunctuator()
775 {
776 GetToken().type_ = TokenType::PUNCTUATOR_EXCLAMATION_MARK;
777
778 switch (Iterator().Peek()) {
779 case LEX_CHAR_EQUALS: {
780 GetToken().type_ = TokenType::PUNCTUATOR_NOT_EQUAL;
781 Iterator().Forward(1);
782
783 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
784 GetToken().type_ = TokenType::PUNCTUATOR_NOT_STRICT_EQUAL;
785 Iterator().Forward(1);
786 }
787 break;
788 }
789 default: {
790 break;
791 }
792 }
793 }
794
ScanAmpersandPunctuator()795 void Lexer::ScanAmpersandPunctuator()
796 {
797 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_AND;
798
799 switch (Iterator().Peek()) {
800 case LEX_CHAR_AMPERSAND: {
801 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_AND;
802 Iterator().Forward(1);
803
804 switch (Iterator().Peek()) {
805 case LEX_CHAR_EQUALS: {
806 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_AND_EQUAL;
807 Iterator().Forward(1);
808 break;
809 }
810 default: {
811 break;
812 }
813 }
814
815 break;
816 }
817 case LEX_CHAR_EQUALS: {
818 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_AND_EQUAL;
819 Iterator().Forward(1);
820 break;
821 }
822 default: {
823 break;
824 }
825 }
826 }
827
ScanVLinePunctuator()828 void Lexer::ScanVLinePunctuator()
829 {
830 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_OR;
831
832 switch (Iterator().Peek()) {
833 case LEX_CHAR_VLINE: {
834 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_OR;
835 Iterator().Forward(1);
836
837 switch (Iterator().Peek()) {
838 case LEX_CHAR_EQUALS: {
839 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_OR_EQUAL;
840 Iterator().Forward(1);
841 break;
842 }
843 default: {
844 break;
845 }
846 }
847
848 break;
849 }
850 case LEX_CHAR_EQUALS: {
851 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_OR_EQUAL;
852 Iterator().Forward(1);
853 break;
854 }
855 default: {
856 break;
857 }
858 }
859 }
860
ScanCircumflexPunctuator()861 void Lexer::ScanCircumflexPunctuator()
862 {
863 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_XOR;
864
865 switch (Iterator().Peek()) {
866 case LEX_CHAR_EQUALS: {
867 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_XOR_EQUAL;
868 Iterator().Forward(1);
869 break;
870 }
871 default: {
872 break;
873 }
874 }
875 }
876
ScanPlusPunctuator()877 void Lexer::ScanPlusPunctuator()
878 {
879 GetToken().type_ = TokenType::PUNCTUATOR_PLUS;
880
881 switch (Iterator().Peek()) {
882 case LEX_CHAR_PLUS: {
883 GetToken().type_ = TokenType::PUNCTUATOR_PLUS_PLUS;
884 Iterator().Forward(1);
885 break;
886 }
887 case LEX_CHAR_EQUALS: {
888 GetToken().type_ = TokenType::PUNCTUATOR_PLUS_EQUAL;
889 Iterator().Forward(1);
890 break;
891 }
892 default: {
893 break;
894 }
895 }
896 }
897
ScanMinusPunctuator()898 void Lexer::ScanMinusPunctuator()
899 {
900 GetToken().type_ = TokenType::PUNCTUATOR_MINUS;
901
902 switch (Iterator().Peek()) {
903 case LEX_CHAR_MINUS: {
904 GetToken().type_ = TokenType::PUNCTUATOR_MINUS_MINUS;
905 Iterator().Forward(1);
906 break;
907 }
908 case LEX_CHAR_EQUALS: {
909 GetToken().type_ = TokenType::PUNCTUATOR_MINUS_EQUAL;
910 Iterator().Forward(1);
911 break;
912 }
913 default: {
914 break;
915 }
916 }
917 }
918
ScanSlashPunctuator()919 void Lexer::ScanSlashPunctuator()
920 {
921 GetToken().type_ = TokenType::PUNCTUATOR_DIVIDE;
922
923 switch (Iterator().Peek()) {
924 case LEX_CHAR_EQUALS: {
925 GetToken().type_ = TokenType::PUNCTUATOR_DIVIDE_EQUAL;
926 Iterator().Forward(1);
927 break;
928 }
929 default: {
930 break;
931 }
932 }
933 }
934
ScanDotPunctuator()935 void Lexer::ScanDotPunctuator()
936 {
937 GetToken().type_ = TokenType::PUNCTUATOR_PERIOD;
938
939 switch (Iterator().Peek()) {
940 case LEX_CHAR_0:
941 case LEX_CHAR_1:
942 case LEX_CHAR_2:
943 case LEX_CHAR_3:
944 case LEX_CHAR_4:
945 case LEX_CHAR_5:
946 case LEX_CHAR_6:
947 case LEX_CHAR_7:
948 case LEX_CHAR_8:
949 case LEX_CHAR_9: {
950 ScanNumber();
951 break;
952 }
953 case LEX_CHAR_QUESTION: {
954 GetToken().type_ = TokenType::PUNCTUATOR_PERIOD_QUESTION;
955 Iterator().Forward(1);
956 break;
957 }
958 case LEX_CHAR_DOT: {
959 Iterator().Forward(1);
960
961 if (Iterator().Peek() == LEX_CHAR_DOT) {
962 GetToken().type_ = TokenType::PUNCTUATOR_PERIOD_PERIOD_PERIOD;
963 Iterator().Forward(1);
964 break;
965 }
966
967 Iterator().Backward(1);
968 break;
969 }
970 default: {
971 break;
972 }
973 }
974 }
975
ScanAsterixPunctuator()976 void Lexer::ScanAsterixPunctuator()
977 {
978 GetToken().type_ = TokenType::PUNCTUATOR_MULTIPLY;
979
980 switch (Iterator().Peek()) {
981 case LEX_CHAR_ASTERISK: {
982 GetToken().type_ = TokenType::PUNCTUATOR_EXPONENTIATION;
983 Iterator().Forward(1);
984
985 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
986 GetToken().type_ = TokenType::PUNCTUATOR_EXPONENTIATION_EQUAL;
987 Iterator().Forward(1);
988 }
989 break;
990 }
991 case LEX_CHAR_EQUALS: {
992 GetToken().type_ = TokenType::PUNCTUATOR_MULTIPLY_EQUAL;
993 Iterator().Forward(1);
994 break;
995 }
996 default: {
997 break;
998 }
999 }
1000 }
1001
ScanPercentPunctuator()1002 void Lexer::ScanPercentPunctuator()
1003 {
1004 GetToken().type_ = TokenType::PUNCTUATOR_MOD;
1005
1006 switch (Iterator().Peek()) {
1007 case LEX_CHAR_EQUALS: {
1008 GetToken().type_ = TokenType::PUNCTUATOR_MOD_EQUAL;
1009 Iterator().Forward(1);
1010 break;
1011 }
1012 default: {
1013 break;
1014 }
1015 }
1016 }
1017
IsLineTerminatorOrEos() const1018 bool Lexer::IsLineTerminatorOrEos() const
1019 {
1020 switch (Iterator().PeekCp()) {
1021 case util::StringView::Iterator::INVALID_CP:
1022 case LEX_CHAR_LF:
1023 case LEX_CHAR_CR:
1024 case LEX_CHAR_LS:
1025 case LEX_CHAR_PS: {
1026 return true;
1027 }
1028 default: {
1029 break;
1030 }
1031 }
1032
1033 return false;
1034 }
1035
ScanRegExpPattern()1036 void Lexer::ScanRegExpPattern()
1037 {
1038 bool isCharClass = false;
1039 size_t cpSize {};
1040
1041 while (true) {
1042 switch (Iterator().PeekCp(&cpSize)) {
1043 case util::StringView::Iterator::INVALID_CP:
1044 case LEX_CHAR_LF:
1045 case LEX_CHAR_CR:
1046 case LEX_CHAR_LS:
1047 case LEX_CHAR_PS: {
1048 ThrowError("Unterminated RegExp");
1049 break;
1050 }
1051 case LEX_CHAR_SLASH: {
1052 if (!isCharClass) {
1053 return;
1054 }
1055
1056 break;
1057 }
1058 case LEX_CHAR_LEFT_SQUARE: {
1059 isCharClass = true;
1060 break;
1061 }
1062 case LEX_CHAR_RIGHT_SQUARE: {
1063 isCharClass = false;
1064 break;
1065 }
1066 case LEX_CHAR_BACKSLASH: {
1067 Iterator().Forward(1);
1068
1069 if (IsLineTerminatorOrEos()) {
1070 continue;
1071 }
1072
1073 Iterator().PeekCp(&cpSize);
1074 Iterator().Forward(cpSize);
1075 continue;
1076 }
1077 default: {
1078 break;
1079 }
1080 }
1081
1082 Iterator().Forward(cpSize);
1083 }
1084 }
1085
GetRegExpFlag(char32_t cp,RegExpFlags & flag)1086 bool Lexer::GetRegExpFlag(char32_t cp, RegExpFlags &flag)
1087 {
1088 switch (cp) {
1089 case LEX_CHAR_LOWERCASE_G: {
1090 flag = RegExpFlags::GLOBAL;
1091 break;
1092 }
1093 case LEX_CHAR_LOWERCASE_I: {
1094 flag = RegExpFlags::IGNORE_CASE;
1095 break;
1096 }
1097 case LEX_CHAR_LOWERCASE_M: {
1098 flag = RegExpFlags::MULTILINE;
1099 break;
1100 }
1101 case LEX_CHAR_LOWERCASE_S: {
1102 flag = RegExpFlags::DOTALL;
1103 break;
1104 }
1105 case LEX_CHAR_LOWERCASE_U: {
1106 flag = RegExpFlags::UNICODE;
1107 break;
1108 }
1109 case LEX_CHAR_LOWERCASE_Y: {
1110 flag = RegExpFlags::STICKY;
1111 break;
1112 }
1113 case LEX_CHAR_LOWERCASE_D: {
1114 flag = RegExpFlags::HAS_INDICES;
1115 break;
1116 }
1117 default: {
1118 return false;
1119 }
1120 }
1121 return true;
1122 }
1123
ScanRegExpFlags()1124 RegExpFlags Lexer::ScanRegExpFlags()
1125 {
1126 RegExpFlags resultFlags = RegExpFlags::EMPTY;
1127
1128 while (true) {
1129 size_t cpSize {};
1130 auto cp = Iterator().PeekCp(&cpSize);
1131 if (!KeywordsUtil::IsIdentifierPart(cp)) {
1132 break;
1133 }
1134
1135 Iterator().Forward(cpSize);
1136
1137 RegExpFlags flag = RegExpFlags::EMPTY;
1138
1139 if (!GetRegExpFlag(cp, flag)) {
1140 if (cp == LEX_CHAR_SP) {
1141 return resultFlags;
1142 } else {
1143 ThrowError("Invalid RegExp flag");
1144 }
1145 }
1146
1147 if (flag == RegExpFlags::EMPTY || (resultFlags & flag) != 0) {
1148 ThrowError("Invalid RegExp flag");
1149 }
1150
1151 resultFlags = resultFlags | flag;
1152 }
1153
1154 return resultFlags;
1155 }
1156
ScanRegExp()1157 RegExp Lexer::ScanRegExp()
1158 {
1159 GetToken().type_ = TokenType::LITERAL_REGEXP;
1160
1161 const auto patternStart = Iterator().Index();
1162 ScanRegExpPattern();
1163 const auto pattern = SourceView(patternStart, Iterator().Index());
1164
1165 ASSERT(Iterator().Peek() == LEX_CHAR_SLASH);
1166 Iterator().Forward(1);
1167
1168 const auto flagsStart = Iterator().Index();
1169 RegExpFlags resultFlags = ScanRegExpFlags();
1170 const auto flags = SourceView(flagsStart, Iterator().Index());
1171
1172 SkipWhiteSpaces();
1173 SetTokenEnd();
1174
1175 return {pattern, flags, resultFlags};
1176 }
1177
CheckArrow()1178 bool Lexer::CheckArrow()
1179 {
1180 if (Iterator().Peek() != LEX_CHAR_EQUALS) {
1181 return false;
1182 }
1183 Iterator().Forward(1);
1184
1185 bool res = Iterator().Peek() == LEX_CHAR_GREATER_THAN;
1186 Iterator().Backward(1);
1187
1188 return res;
1189 }
1190
SetTokenStart()1191 void Lexer::SetTokenStart()
1192 {
1193 if (pos_.nextTokenLine != 0) {
1194 pos_.line += pos_.nextTokenLine;
1195 pos_.nextTokenLine = 0;
1196 GetToken().flags_ = TokenFlags::NEW_LINE;
1197 } else {
1198 GetToken().flags_ = TokenFlags::NONE;
1199 }
1200
1201 pos_.token.loc_.start = SourcePosition {Iterator().Index(), pos_.line};
1202 }
1203
SetTokenEnd()1204 void Lexer::SetTokenEnd()
1205 {
1206 pos_.token.loc_.end = SourcePosition {Iterator().Index(), pos_.line};
1207 }
1208
CheckAwaitKeyword()1209 void Lexer::CheckAwaitKeyword()
1210 {
1211 if (parserContext_->IsStaticBlock()) {
1212 ThrowError("'await' is not allowed in class static block");
1213 }
1214 // support top level await for module
1215 if (!parserContext_->IsAsync()) {
1216 if (!parserContext_->IsModule() || parserContext_->GetProgram()->IsDtsFile()) {
1217 GetToken().type_ = TokenType::LITERAL_IDENT;
1218 return;
1219 }
1220 if (parserContext_->GetProgram()->Extension() == ScriptExtension::TS) {
1221 if (parserContext_->IsTsModule()) {
1222 GetToken().type_ = TokenType::LITERAL_IDENT;
1223 return;
1224 }
1225 }
1226 }
1227
1228 if (parserContext_->DisallowAwait()) {
1229 ThrowError("'await' is not allowed");
1230 }
1231 if (GetToken().flags_ & TokenFlags::HAS_ESCAPE) {
1232 ThrowError("Keyword must not contain escaped characters");
1233 }
1234 GetToken().type_ = TokenType::KEYW_AWAIT;
1235 }
1236
CheckArgumentsKeyword()1237 void Lexer::CheckArgumentsKeyword()
1238 {
1239 if (parserContext_->DisallowArguments()) {
1240 ThrowError("'arguments' is not allowed in static block and field initializer");
1241 }
1242 }
1243
CheckKeywordEscape(TokenType type)1244 void Lexer::CheckKeywordEscape(TokenType type)
1245 {
1246 if (GetToken().flags_ & TokenFlags::HAS_ESCAPE) {
1247 ThrowError("Escape sequences are not allowed in keywords");
1248 }
1249
1250 GetToken().type_ = type;
1251 }
1252
CheckEnumKeyword()1253 void Lexer::CheckEnumKeyword()
1254 {
1255 if (parserContext_->GetProgram()->Extension() == ScriptExtension::JS) {
1256 ThrowError("Unexpected reserved keyword");
1257 }
1258
1259 if (GetToken().flags_ & TokenFlags::HAS_ESCAPE) {
1260 ThrowError("Escape sequences are not allowed in keywords");
1261 }
1262
1263 GetToken().type_ = TokenType::LITERAL_IDENT;
1264 }
1265
CheckLetKeyword()1266 void Lexer::CheckLetKeyword()
1267 {
1268 GetToken().type_ = TokenType::KEYW_LET;
1269 }
1270
CheckYieldKeyword()1271 void Lexer::CheckYieldKeyword()
1272 {
1273 if (!parserContext_->AllowYield()) {
1274 ThrowError("'yield' is not allowed");
1275 }
1276
1277 GetToken().type_ = TokenType::KEYW_YIELD;
1278 }
1279
CheckFutureReservedKeyword(TokenType keywordType)1280 void Lexer::CheckFutureReservedKeyword(TokenType keywordType)
1281 {
1282 GetToken().type_ = TokenType::LITERAL_IDENT;
1283
1284 if (parserContext_->GetProgram()->Extension() == ScriptExtension::TS && keywordType <= TokenType::KEYW_INTERFACE) {
1285 return;
1286 }
1287
1288 ThrowError("Unexpected strict mode reserved keyword");
1289 }
1290
SkipWhiteSpaces()1291 void Lexer::SkipWhiteSpaces()
1292 {
1293 while (true) {
1294 auto cp = Iterator().Peek();
1295
1296 switch (cp) {
1297 case LEX_CHAR_HASH_MARK: {
1298 Iterator().Forward(1);
1299 cp = Iterator().Peek();
1300 if (cp != LEX_CHAR_EXCLAMATION) {
1301 Iterator().Backward(1);
1302 return;
1303 }
1304 if (Iterator().Index() != 1) {
1305 /*
1306 * according to ECMA-262 specification item 12.5 Hashbang Comments are location-sensitive.
1307 * only allowed occurs at the beginning of files, other position is illegal.
1308 */
1309 Iterator().Backward(1);
1310 ThrowError("Invalid or unexpected token");
1311 }
1312
1313 Iterator().Forward(1);
1314 SkipSingleLineComment();
1315 continue;
1316 }
1317 case LEX_CHAR_CR: {
1318 Iterator().Forward(1);
1319
1320 if (Iterator().Peek() != LEX_CHAR_LF) {
1321 Iterator().Backward(1);
1322 }
1323
1324 [[fallthrough]];
1325 }
1326 case LEX_CHAR_LF: {
1327 Iterator().Forward(1);
1328 pos_.nextTokenLine++;
1329 continue;
1330 }
1331 case LEX_CHAR_VT:
1332 case LEX_CHAR_FF:
1333 case LEX_CHAR_SP:
1334 case LEX_CHAR_TAB: {
1335 Iterator().Forward(1);
1336 continue;
1337 }
1338 case LEX_CHAR_SLASH: {
1339 Iterator().Forward(1);
1340 cp = Iterator().Peek();
1341 if (cp == LEX_CHAR_SLASH) {
1342 Iterator().Forward(1);
1343 SkipSingleLineComment();
1344 continue;
1345 }
1346 if (cp == LEX_CHAR_ASTERISK) {
1347 Iterator().Forward(1);
1348 SkipMultiLineComment();
1349 continue;
1350 }
1351
1352 Iterator().Backward(1);
1353 return;
1354 }
1355 default: {
1356 if (cp < LEX_ASCII_MAX_BITS) {
1357 return;
1358 }
1359
1360 size_t cpSize {};
1361 cp = Iterator().PeekCp(&cpSize);
1362
1363 switch (cp) {
1364 case LEX_CHAR_LS:
1365 case LEX_CHAR_PS: {
1366 pos_.nextTokenLine++;
1367 [[fallthrough]];
1368 }
1369 case LEX_CHAR_NBSP:
1370 case LEX_CHAR_NLINE:
1371 case LEX_CHAR_IGSP:
1372 case LEX_CHAR_ZWNBSP: {
1373 Iterator().Forward(cpSize);
1374 continue;
1375 }
1376 default: {
1377 return;
1378 }
1379 }
1380 }
1381 }
1382 }
1383 }
1384
1385 // NOLINTNEXTLINE(readability-function-size)
NextToken(LexerNextTokenFlags flags)1386 void Lexer::NextToken(LexerNextTokenFlags flags)
1387 {
1388 Keywords kws(this, flags);
1389 KeywordsUtil &kwu = kws.Util();
1390
1391 SetTokenStart();
1392
1393 auto cp = Iterator().Peek();
1394 Iterator().Forward(1);
1395
1396 GetToken().keywordType_ = TokenType::EOS;
1397
1398 switch (cp) {
1399 case LEX_CHAR_EXCLAMATION: {
1400 ScanExclamationPunctuator();
1401 break;
1402 }
1403 case LEX_CHAR_SINGLE_QUOTE: {
1404 ScanString<LEX_CHAR_SINGLE_QUOTE>();
1405 break;
1406 }
1407 case LEX_CHAR_DOUBLE_QUOTE: {
1408 ScanString<LEX_CHAR_DOUBLE_QUOTE>();
1409 break;
1410 }
1411 case LEX_CHAR_HASH_MARK: {
1412 GetToken().type_ = TokenType::PUNCTUATOR_HASH_MARK;
1413 break;
1414 }
1415 case LEX_CHAR_PERCENT: {
1416 ScanPercentPunctuator();
1417 break;
1418 }
1419 case LEX_CHAR_AMPERSAND: {
1420 ScanAmpersandPunctuator();
1421 break;
1422 }
1423 case LEX_CHAR_LEFT_PAREN: {
1424 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_PARENTHESIS;
1425 break;
1426 }
1427 case LEX_CHAR_RIGHT_PAREN: {
1428 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_PARENTHESIS;
1429 break;
1430 }
1431 case LEX_CHAR_ASTERISK: {
1432 ScanAsterixPunctuator();
1433 break;
1434 }
1435 case LEX_CHAR_PLUS: {
1436 ScanPlusPunctuator();
1437 break;
1438 }
1439 case LEX_CHAR_COMMA: {
1440 GetToken().type_ = TokenType::PUNCTUATOR_COMMA;
1441 break;
1442 }
1443 case LEX_CHAR_MINUS: {
1444 ScanMinusPunctuator();
1445 break;
1446 }
1447 case LEX_CHAR_DOT: {
1448 ScanDotPunctuator();
1449 break;
1450 }
1451 case LEX_CHAR_SLASH: {
1452 ScanSlashPunctuator();
1453 break;
1454 }
1455 case LEX_CHAR_0: {
1456 ScanNumberLeadingZero();
1457 break;
1458 }
1459 case LEX_CHAR_1:
1460 case LEX_CHAR_2:
1461 case LEX_CHAR_3:
1462 case LEX_CHAR_4:
1463 case LEX_CHAR_5:
1464 case LEX_CHAR_6:
1465 case LEX_CHAR_7:
1466 case LEX_CHAR_8:
1467 case LEX_CHAR_9: {
1468 ScanNumber();
1469 break;
1470 }
1471 case LEX_CHAR_COLON: {
1472 GetToken().type_ = TokenType::PUNCTUATOR_COLON;
1473 break;
1474 }
1475 case LEX_CHAR_SEMICOLON: {
1476 GetToken().type_ = TokenType::PUNCTUATOR_SEMI_COLON;
1477 break;
1478 }
1479 case LEX_CHAR_LESS_THAN: {
1480 ScanLessThanPunctuator();
1481 break;
1482 }
1483 case LEX_CHAR_EQUALS: {
1484 ScanEqualsPunctuator();
1485 break;
1486 }
1487 case LEX_CHAR_GREATER_THAN: {
1488 ScanGreaterThanPunctuator();
1489 break;
1490 }
1491 case LEX_CHAR_QUESTION: {
1492 ScanQuestionPunctuator();
1493 break;
1494 }
1495 case LEX_CHAR_AT: {
1496 GetToken().type_ = TokenType::PUNCTUATOR_AT;
1497 break;
1498 }
1499 case LEX_CHAR_DOLLAR_SIGN:
1500 case LEX_CHAR_UPPERCASE_A:
1501 case LEX_CHAR_UPPERCASE_B:
1502 case LEX_CHAR_UPPERCASE_C:
1503 case LEX_CHAR_UPPERCASE_D:
1504 case LEX_CHAR_UPPERCASE_E:
1505 case LEX_CHAR_UPPERCASE_F:
1506 case LEX_CHAR_UPPERCASE_G:
1507 case LEX_CHAR_UPPERCASE_H:
1508 case LEX_CHAR_UPPERCASE_I:
1509 case LEX_CHAR_UPPERCASE_J:
1510 case LEX_CHAR_UPPERCASE_K:
1511 case LEX_CHAR_UPPERCASE_L:
1512 case LEX_CHAR_UPPERCASE_M:
1513 case LEX_CHAR_UPPERCASE_N:
1514 case LEX_CHAR_UPPERCASE_O:
1515 case LEX_CHAR_UPPERCASE_P:
1516 case LEX_CHAR_UPPERCASE_Q:
1517 case LEX_CHAR_UPPERCASE_R:
1518 case LEX_CHAR_UPPERCASE_S:
1519 case LEX_CHAR_UPPERCASE_T:
1520 case LEX_CHAR_UPPERCASE_U:
1521 case LEX_CHAR_UPPERCASE_V:
1522 case LEX_CHAR_UPPERCASE_W:
1523 case LEX_CHAR_UPPERCASE_X:
1524 case LEX_CHAR_UPPERCASE_Y:
1525 case LEX_CHAR_UPPERCASE_Z:
1526 case LEX_CHAR_UNDERSCORE:
1527 case LEX_CHAR_LOWERCASE_H:
1528 case LEX_CHAR_LOWERCASE_J:
1529 case LEX_CHAR_LOWERCASE_Q:
1530 case LEX_CHAR_LOWERCASE_X:
1531 case LEX_CHAR_LOWERCASE_Z: {
1532 kwu.ScanIdContinue();
1533 break;
1534 }
1535 case LEX_CHAR_LEFT_SQUARE: {
1536 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_SQUARE_BRACKET;
1537 break;
1538 }
1539 case LEX_CHAR_BACKSLASH: {
1540 GetToken().flags_ |= TokenFlags::HAS_ESCAPE;
1541
1542 if (Iterator().Peek() != LEX_CHAR_LOWERCASE_U) {
1543 ThrowError("Invalid character");
1544 }
1545
1546 cp = ScanUnicodeEscapeSequence();
1547
1548 kwu.ScanIdentifierStart(cp);
1549 break;
1550 }
1551 case LEX_CHAR_RIGHT_SQUARE: {
1552 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_SQUARE_BRACKET;
1553 break;
1554 }
1555 case LEX_CHAR_CIRCUMFLEX: {
1556 ScanCircumflexPunctuator();
1557 break;
1558 }
1559 case LEX_CHAR_BACK_TICK: {
1560 GetToken().type_ = TokenType::PUNCTUATOR_BACK_TICK;
1561 SetTokenEnd();
1562 return;
1563 }
1564 case LEX_CHAR_LOWERCASE_A: {
1565 kws.ScanA();
1566 break;
1567 }
1568 case LEX_CHAR_LOWERCASE_B: {
1569 kws.ScanB();
1570 break;
1571 }
1572 case LEX_CHAR_LOWERCASE_C: {
1573 kws.ScanC();
1574 break;
1575 }
1576 case LEX_CHAR_LOWERCASE_D: {
1577 kws.ScanD();
1578 break;
1579 }
1580 case LEX_CHAR_LOWERCASE_E: {
1581 kws.ScanE();
1582 break;
1583 }
1584 case LEX_CHAR_LOWERCASE_F: {
1585 kws.ScanF();
1586 break;
1587 }
1588 case LEX_CHAR_LOWERCASE_G: {
1589 kws.ScanG();
1590 break;
1591 }
1592 case LEX_CHAR_LOWERCASE_I: {
1593 kws.ScanI();
1594 break;
1595 }
1596 case LEX_CHAR_LOWERCASE_K: {
1597 kws.ScanK();
1598 break;
1599 }
1600 case LEX_CHAR_LOWERCASE_L: {
1601 kws.ScanL();
1602 break;
1603 }
1604 case LEX_CHAR_LOWERCASE_M: {
1605 kws.ScanM();
1606 break;
1607 }
1608 case LEX_CHAR_LOWERCASE_N: {
1609 kws.ScanN();
1610 break;
1611 }
1612 case LEX_CHAR_LOWERCASE_O: {
1613 kws.ScanO();
1614 break;
1615 }
1616 case LEX_CHAR_LOWERCASE_P: {
1617 kws.ScanP();
1618 break;
1619 }
1620 case LEX_CHAR_LOWERCASE_R: {
1621 kws.ScanR();
1622 break;
1623 }
1624 case LEX_CHAR_LOWERCASE_S: {
1625 kws.ScanS();
1626 break;
1627 }
1628 case LEX_CHAR_LOWERCASE_T: {
1629 kws.ScanT();
1630 break;
1631 }
1632 case LEX_CHAR_LOWERCASE_U: {
1633 kws.ScanU();
1634 break;
1635 }
1636 case LEX_CHAR_LOWERCASE_V: {
1637 kws.ScanV();
1638 break;
1639 }
1640 case LEX_CHAR_LOWERCASE_W: {
1641 kws.ScanW();
1642 break;
1643 }
1644 case LEX_CHAR_LOWERCASE_Y: {
1645 kws.ScanY();
1646 break;
1647 }
1648 case LEX_CHAR_LEFT_BRACE: {
1649 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_BRACE;
1650
1651 if (tlCtx_) {
1652 tlCtx_->ConsumeLeftBrace();
1653 }
1654
1655 break;
1656 }
1657 case LEX_CHAR_VLINE: {
1658 ScanVLinePunctuator();
1659 break;
1660 }
1661 case LEX_CHAR_RIGHT_BRACE: {
1662 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_BRACE;
1663
1664 if (tlCtx_ && tlCtx_->ConsumeRightBrace()) {
1665 SetTokenEnd();
1666 return;
1667 }
1668
1669 break;
1670 }
1671 case LEX_CHAR_TILDE: {
1672 GetToken().type_ = TokenType::PUNCTUATOR_TILDE;
1673 break;
1674 }
1675 default: {
1676 Iterator().Backward(1);
1677
1678 if (cp == util::StringView::Iterator::INVALID_CP) {
1679 GetToken().type_ = TokenType::EOS;
1680 break;
1681 }
1682
1683 cp = Iterator().Next();
1684 kwu.ScanIdentifierStart(cp);
1685 break;
1686 }
1687 }
1688
1689 SetTokenEnd();
1690 SkipWhiteSpaces();
1691 }
1692
AssignTokenEscapeError()1693 void Lexer::AssignTokenEscapeError()
1694 {
1695 GetToken().flags_ |= TokenFlags::ESCAPE_ERROR;
1696 }
1697
AssignTokenTaggedTemplate()1698 void Lexer::AssignTokenTaggedTemplate()
1699 {
1700 GetToken().flags_ |= TokenFlags::TAGGED_TEMPLATE;
1701 }
1702
CheckTokenIsTaggedTemplate() const1703 bool Lexer::CheckTokenIsTaggedTemplate() const
1704 {
1705 return GetToken().IsTaggedTemplate();
1706 }
1707
1708 } // namespace panda::es2panda::lexer
1709