1 /**
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "lexer.h"
17
18 #include <es2panda.h>
19 #include <gen/keywords.h>
20 #include <lexer/token/letters.h>
21 #include <lexer/token/tokenType.h>
22 #include <parser/context/parserContext.h>
23
24 #include <array>
25
26 namespace panda::es2panda::lexer {
27
LexerPosition(const util::StringView & source)28 LexerPosition::LexerPosition(const util::StringView &source) : iterator(source) {}
29
Lexer(const parser::ParserContext * parserContext)30 Lexer::Lexer(const parser::ParserContext *parserContext)
31 : allocator_(parserContext->GetProgram()->Allocator()),
32 parserContext_(parserContext),
33 source_(parserContext->GetProgram()->SourceCode()),
34 pos_(source_)
35 {
36 SkipWhiteSpaces();
37 }
38
ScanUnicodeEscapeSequence()39 char32_t Lexer::ScanUnicodeEscapeSequence()
40 {
41 ASSERT(Iterator().Peek() == LEX_CHAR_LOWERCASE_U);
42
43 Iterator().Forward(1);
44
45 if (Iterator().Peek() == LEX_CHAR_LEFT_BRACE) {
46 Iterator().Forward(1);
47 return ScanUnicodeCodePointEscape();
48 }
49
50 // 4: a template parameter to the expected fixed length when scanning Unicode escape sequences
51 return ScanHexEscape<4>();
52 }
53
ScanUnicodeCodePointEscape()54 char32_t Lexer::ScanUnicodeCodePointEscape()
55 {
56 double code = 0;
57 char32_t cp {};
58
59 while (true) {
60 cp = Iterator().Peek();
61 if (!IsHexDigit(cp)) {
62 break;
63 }
64
65 Iterator().Forward(1);
66
67 constexpr auto multiplier = 16;
68 code = code * multiplier + HexValue(cp);
69 if (code > UNICODE_CODE_POINT_MAX) {
70 if (CheckTokenIsTaggedTemplate()) {
71 AssignTokenEscapeError();
72 break;
73 }
74 ThrowError("Invalid unicode escape sequence");
75 }
76 }
77 if (cp != LEX_CHAR_RIGHT_BRACE) {
78 if (CheckTokenIsTaggedTemplate()) {
79 AssignTokenEscapeError();
80 return static_cast<char32_t>(code);
81 } else {
82 ThrowError("Invalid unicode escape sequence");
83 }
84 }
85
86 Iterator().Forward(1);
87 return static_cast<char32_t>(code);
88 }
89
Allocator()90 ArenaAllocator *Lexer::Allocator()
91 {
92 return allocator_;
93 }
94
GetToken()95 Token &Lexer::GetToken()
96 {
97 return pos_.token;
98 }
99
GetToken() const100 const Token &Lexer::GetToken() const
101 {
102 return pos_.token;
103 }
104
Line() const105 size_t Lexer::Line() const
106 {
107 return pos_.line;
108 }
109
Save() const110 LexerPosition Lexer::Save() const
111 {
112 return pos_;
113 }
114
BackwardToken(TokenType type,size_t offset)115 void Lexer::BackwardToken(TokenType type, size_t offset)
116 {
117 pos_.token.type_ = type;
118 pos_.iterator.Reset(GetToken().End().index - offset);
119 pos_.nextTokenLine = 0;
120 }
121
ForwardToken(TokenType type,size_t offset)122 void Lexer::ForwardToken(TokenType type, size_t offset)
123 {
124 SetTokenStart();
125 pos_.token.type_ = type;
126 pos_.iterator.Forward(offset);
127 SetTokenEnd();
128 SkipWhiteSpaces();
129 }
130
Rewind(const LexerPosition & pos)131 void Lexer::Rewind(const LexerPosition &pos)
132 {
133 pos_ = pos;
134 }
135
Lookahead()136 char32_t Lexer::Lookahead()
137 {
138 return Iterator().Peek();
139 }
140
SourceView(const util::StringView::Iterator & begin,const util::StringView::Iterator & end) const141 util::StringView Lexer::SourceView(const util::StringView::Iterator &begin, const util::StringView::Iterator &end) const
142 {
143 return SourceView(begin.Index(), end.Index());
144 }
145
SourceView(size_t begin,size_t end) const146 util::StringView Lexer::SourceView(size_t begin, size_t end) const
147 {
148 return source_.Substr(begin, end);
149 }
150
SkipMultiLineComment()151 void Lexer::SkipMultiLineComment()
152 {
153 while (true) {
154 switch (Iterator().Next()) {
155 case util::StringView::Iterator::INVALID_CP: {
156 ThrowError("Unterminated multi-line comment");
157 break;
158 }
159 case LEX_CHAR_CR: {
160 if (Iterator().Peek() == LEX_CHAR_LF) {
161 Iterator().Forward(1);
162 }
163
164 [[fallthrough]];
165 }
166 case LEX_CHAR_LF:
167 case LEX_CHAR_LS:
168 case LEX_CHAR_PS: {
169 pos_.nextTokenLine++;
170 continue;
171 }
172 case LEX_CHAR_ASTERISK: {
173 if (Iterator().Peek() == LEX_CHAR_SLASH) {
174 Iterator().Forward(1);
175 return;
176 }
177
178 break;
179 }
180 default: {
181 break;
182 }
183 }
184 }
185 }
186
187 /* New line character is not processed */
SkipSingleLineComment()188 void Lexer::SkipSingleLineComment()
189 {
190 while (true) {
191 // INVALID_CP may appear in the middle of a comment
192 // It can not be used to determine the end of the comment.
193 if (!Iterator().HasNext()) {
194 Iterator().Next();
195 pos_.nextTokenLine++;
196 return;
197 }
198 switch (Iterator().Next()) {
199 case util::StringView::Iterator::INVALID_CP: {
200 // This return means if an INVALID_CP appeared in a single comment,
201 // it will terminates single comment,
202 // but INVALID_CP should not terminate single comment,
203 // only the end of the Iterator can terminates single comment and add pos_.nextTokenLine,
204 // this return should be removed in another issure because it is an incompatible bug fix.
205 return;
206 }
207 case LEX_CHAR_CR: {
208 if (Iterator().Peek() == LEX_CHAR_LF) {
209 Iterator().Forward(1);
210 }
211
212 [[fallthrough]];
213 }
214 case LEX_CHAR_LF:
215 case LEX_CHAR_LS:
216 case LEX_CHAR_PS: {
217 pos_.nextTokenLine++;
218 return;
219 }
220 default: {
221 break;
222 }
223 }
224 }
225 }
226
ThrowError(std::string_view message)227 void Lexer::ThrowError(std::string_view message)
228 {
229 lexer::LineIndex lineIndex = parserContext_->GetProgram()->GetLineIndex();
230 SourceLocation loc = lineIndex.GetLocation(SourcePosition(Iterator().Index(), pos_.line + pos_.nextTokenLine));
231 throw es2panda::Error(es2panda::ErrorType::SYNTAX, message, loc.line, loc.col);
232 }
233
CheckNumberLiteralEnd()234 void Lexer::CheckNumberLiteralEnd()
235 {
236 if (Iterator().Peek() == LEX_CHAR_LOWERCASE_N) {
237 Iterator().Forward(1);
238 GetToken().flags_ |= TokenFlags::NUMBER_BIGINT;
239 }
240
241 GetToken().src_ = SourceView(GetToken().Start().index, Iterator().Index());
242 const auto nextCp = Iterator().PeekCp();
243 if (KeywordsUtil::IsIdentifierStart(nextCp) || IsDecimalDigit(nextCp)) {
244 ThrowError("Invalid numeric literal");
245 }
246 }
247
ScanNumberLeadingZero()248 void Lexer::ScanNumberLeadingZero()
249 {
250 GetToken().type_ = TokenType::LITERAL_NUMBER;
251
252 switch (Iterator().Peek()) {
253 case LEX_CHAR_LOWERCASE_X:
254 case LEX_CHAR_UPPERCASE_X: {
255 Iterator().Forward(1);
256 constexpr auto RADIX = 16;
257 ScanNumberRadix<IsHexDigit, RADIX>();
258 CheckNumberLiteralEnd();
259 return;
260 }
261 case LEX_CHAR_LOWERCASE_B:
262 case LEX_CHAR_UPPERCASE_B: {
263 Iterator().Forward(1);
264 constexpr auto RADIX = 2;
265 ScanNumberRadix<IsBinaryDigit, RADIX>();
266 CheckNumberLiteralEnd();
267 return;
268 }
269 case LEX_CHAR_LOWERCASE_O:
270 case LEX_CHAR_UPPERCASE_O: {
271 Iterator().Forward(1);
272 constexpr auto RADIX = 8;
273 ScanNumberRadix<IsOctalDigit, RADIX>();
274
275 switch (Iterator().Peek()) {
276 case LEX_CHAR_8:
277 case LEX_CHAR_9: {
278 ThrowError("Invalid octal digit");
279 }
280 default: {
281 break;
282 }
283 }
284
285 CheckNumberLiteralEnd();
286 return;
287 }
288 case LEX_CHAR_0:
289 case LEX_CHAR_1:
290 case LEX_CHAR_2:
291 case LEX_CHAR_3:
292 case LEX_CHAR_4:
293 case LEX_CHAR_5:
294 case LEX_CHAR_6:
295 case LEX_CHAR_7: {
296 ThrowError("Implicit octal literal not allowed");
297 break;
298 }
299 case LEX_CHAR_8:
300 case LEX_CHAR_9: {
301 ThrowError("NonOctalDecimalIntegerLiteral is not enabled in strict mode code");
302 break;
303 }
304 default: {
305 break;
306 }
307 }
308
309 ScanNumber(Iterator().Peek() == LEX_CHAR_0);
310 }
311
ScanDecimalNumbers(bool allowNumericSeparator)312 void Lexer::ScanDecimalNumbers(bool allowNumericSeparator)
313 {
314 bool allowNumericOnNext = true;
315
316 while (true) {
317 switch (Iterator().Peek()) {
318 case LEX_CHAR_0:
319 case LEX_CHAR_1:
320 case LEX_CHAR_2:
321 case LEX_CHAR_3:
322 case LEX_CHAR_4:
323 case LEX_CHAR_5:
324 case LEX_CHAR_6:
325 case LEX_CHAR_7:
326 case LEX_CHAR_8:
327 case LEX_CHAR_9: {
328 Iterator().Forward(1);
329 allowNumericOnNext = true;
330 break;
331 }
332 case LEX_CHAR_UNDERSCORE: {
333 Iterator().Backward(1);
334 isUnderscore_ = true;
335
336 if (Iterator().Peek() == LEX_CHAR_DOT || !allowNumericSeparator || !allowNumericOnNext) {
337 Iterator().Forward(1);
338 ThrowError("Invalid numeric separator");
339 }
340
341 GetToken().flags_ |= TokenFlags::NUMBER_HAS_UNDERSCORE;
342 Iterator().Forward(2);
343 allowNumericOnNext = false;
344 break;
345 }
346 default: {
347 if (!allowNumericOnNext) {
348 ThrowError("Numeric separators are not allowed at the end of numeric literals");
349 }
350 return;
351 }
352 }
353 }
354 }
355
ConvertNumber(size_t exponentSignPos)356 void Lexer::ConvertNumber(size_t exponentSignPos)
357 {
358 util::StringView sv = SourceView(GetToken().Start().index, Iterator().Index());
359 std::string utf8 = std::string {sv.Utf8()};
360 bool needConversion = false;
361
362 if (exponentSignPos != std::numeric_limits<size_t>::max()) {
363 utf8.insert(exponentSignPos, 1, '+');
364 needConversion = true;
365 }
366
367 if (GetToken().flags_ & TokenFlags::NUMBER_HAS_UNDERSCORE) {
368 utf8.erase(std::remove(utf8.begin(), utf8.end(), LEX_CHAR_UNDERSCORE), utf8.end());
369 needConversion = true;
370 }
371
372 if (needConversion) {
373 util::UString converted(utf8, Allocator());
374 GetToken().src_ = converted.View();
375 } else {
376 GetToken().src_ = sv;
377 }
378
379 try {
380 GetToken().number_ = static_cast<double>(std::stold(utf8, nullptr));
381 } catch (const std::invalid_argument &) {
382 ThrowError("Invalid number");
383 } catch (const std::out_of_range &) {
384 // TODO(frobert): look for a more elegant solution to this
385 GetToken().number_ = std::numeric_limits<double>::infinity();
386 }
387 }
ScanNumber(bool allowNumericSeparator,bool allowBigInt)388 void Lexer::ScanNumber(bool allowNumericSeparator, bool allowBigInt)
389 {
390 GetToken().type_ = TokenType::LITERAL_NUMBER;
391
392 ScanDecimalNumbers(allowNumericSeparator);
393
394 size_t exponentSignPos = std::numeric_limits<size_t>::max();
395 bool parseExponent = true;
396
397 if (Iterator().Peek() == LEX_CHAR_DOT) {
398 allowBigInt = false;
399 Iterator().Forward(1);
400
401 auto cp = Iterator().Peek();
402 if (IsDecimalDigit(cp) || cp == LEX_CHAR_LOWERCASE_E || LEX_CHAR_UPPERCASE_E) {
403 ScanDecimalNumbers(allowNumericSeparator);
404 } else {
405 parseExponent = false;
406 }
407 }
408
409 switch (Iterator().Peek()) {
410 case LEX_CHAR_LOWERCASE_E:
411 case LEX_CHAR_UPPERCASE_E: {
412 allowBigInt = false;
413
414 if (!parseExponent) {
415 break;
416 }
417
418 Iterator().Forward(1);
419
420 switch (Iterator().Peek()) {
421 case LEX_CHAR_UNDERSCORE: {
422 break;
423 }
424 case LEX_CHAR_PLUS:
425 case LEX_CHAR_MINUS: {
426 Iterator().Forward(1);
427 break;
428 }
429 default: {
430 exponentSignPos = Iterator().Index() - GetToken().Start().index;
431 break;
432 }
433 }
434
435 if (!IsDecimalDigit(Iterator().Peek())) {
436 ThrowError("Invalid numeric literal");
437 }
438 ScanDecimalNumbers(allowNumericSeparator);
439 break;
440 }
441 default: {
442 break;
443 }
444 }
445
446 CheckNumberLiteralEnd();
447
448 if (GetToken().flags_ & TokenFlags::NUMBER_BIGINT) {
449 if (!allowBigInt) {
450 ThrowError("Invalid BigInt number");
451 }
452 if (isUnderscore_) {
453 ConvertNumber(exponentSignPos);
454 isUnderscore_ = false;
455 }
456
457 return;
458 }
459
460 ConvertNumber(exponentSignPos);
461 }
462
PushTemplateContext(TemplateLiteralParserContext * ctx)463 void Lexer::PushTemplateContext(TemplateLiteralParserContext *ctx)
464 {
465 tlCtx_ = ctx;
466 }
467
ScanTemplateStringEnd()468 void Lexer::ScanTemplateStringEnd()
469 {
470 ASSERT(Iterator().Peek() == LEX_CHAR_BACK_TICK);
471 Iterator().Forward(1);
472 SetTokenEnd();
473 SkipWhiteSpaces();
474 }
475
ScanTemplateString()476 LexerTemplateString Lexer::ScanTemplateString()
477 {
478 LexerTemplateString templateStr(Allocator());
479 size_t cpSize = 0;
480
481 while (true) {
482 char32_t cp = Iterator().PeekCp(&cpSize);
483
484 switch (cp) {
485 case util::StringView::Iterator::INVALID_CP: {
486 ThrowError("Unexpected token, expected '${' or '`'");
487 break;
488 }
489 case LEX_CHAR_BACK_TICK: {
490 templateStr.end = Iterator().Index();
491 return templateStr;
492 }
493 case LEX_CHAR_CR: {
494 Iterator().Forward(1);
495
496 if (Iterator().Peek() != LEX_CHAR_LF) {
497 Iterator().Backward(1);
498 }
499
500 [[fallthrough]];
501 }
502 case LEX_CHAR_LF: {
503 pos_.line++;
504 templateStr.str.Append(LEX_CHAR_LF);
505 Iterator().Forward(1);
506 continue;
507 }
508 case LEX_CHAR_BACKSLASH: {
509 Iterator().Forward(1);
510
511 char32_t nextCp = Iterator().Peek();
512 if (nextCp == LEX_CHAR_BACK_TICK || nextCp == LEX_CHAR_BACKSLASH || nextCp == LEX_CHAR_DOLLAR_SIGN) {
513 templateStr.str.Append(cp);
514 templateStr.str.Append(nextCp);
515 Iterator().Forward(1);
516 continue;
517 }
518
519 Iterator().Backward(1);
520 break;
521 }
522 case LEX_CHAR_DOLLAR_SIGN: {
523 templateStr.end = Iterator().Index();
524 Iterator().Forward(1);
525
526 if (Iterator().Peek() == LEX_CHAR_LEFT_BRACE) {
527 Iterator().Forward(1);
528 templateStr.scanExpression = true;
529 SkipWhiteSpaces();
530 return templateStr;
531 }
532
533 templateStr.str.Append(cp);
534 continue;
535 }
536 default: {
537 break;
538 }
539 }
540
541 templateStr.str.Append(cp);
542 Iterator().Forward(cpSize);
543 }
544
545 UNREACHABLE();
546 return templateStr;
547 }
548
ResetTokenEnd()549 void Lexer::ResetTokenEnd()
550 {
551 SetTokenStart();
552 pos_.iterator.Reset(GetToken().End().index);
553 pos_.line = GetToken().End().line;
554 pos_.nextTokenLine = 0;
555 }
556
ScanStringUnicodePart(util::UString * str)557 void Lexer::ScanStringUnicodePart(util::UString *str)
558 {
559 size_t cpSize {};
560 char32_t cp = Iterator().PeekCp(&cpSize);
561
562 switch (cp) {
563 case util::StringView::Iterator::INVALID_CP: {
564 ThrowError("Unterminated string");
565 break;
566 }
567 case LEX_CHAR_CR: {
568 Iterator().Forward(1);
569 if (Iterator().Peek() != LEX_CHAR_LF) {
570 Iterator().Backward(1);
571 }
572
573 [[fallthrough]];
574 }
575 case LEX_CHAR_LS:
576 case LEX_CHAR_PS:
577 case LEX_CHAR_LF: {
578 pos_.line++;
579 Iterator().Forward(cpSize);
580 return;
581 }
582 case LEX_CHAR_LOWERCASE_B: {
583 cp = LEX_CHAR_BS;
584 break;
585 }
586 case LEX_CHAR_LOWERCASE_T: {
587 cp = LEX_CHAR_TAB;
588 break;
589 }
590 case LEX_CHAR_LOWERCASE_N: {
591 cp = LEX_CHAR_LF;
592 break;
593 }
594 case LEX_CHAR_LOWERCASE_V: {
595 cp = LEX_CHAR_VT;
596 break;
597 }
598 case LEX_CHAR_LOWERCASE_F: {
599 cp = LEX_CHAR_FF;
600 break;
601 }
602 case LEX_CHAR_LOWERCASE_R: {
603 cp = LEX_CHAR_CR;
604 break;
605 }
606 case LEX_CHAR_LOWERCASE_X: {
607 Iterator().Forward(1);
608 // 2: a template parameter to the expected fixed length when scanning Unicode escape sequences
609 str->Append(ScanHexEscape<2>());
610 return;
611 }
612 case LEX_CHAR_LOWERCASE_U: {
613 cp = ScanUnicodeEscapeSequence();
614 str->Append(cp);
615 return;
616 }
617 case LEX_CHAR_0: {
618 Iterator().Forward(1);
619 bool isDecimal = IsDecimalDigit(Iterator().Peek());
620 bool isOctal = IsOctalDigit(Iterator().Peek());
621 Iterator().Backward(1);
622
623 if (!isDecimal) {
624 cp = LEX_CHAR_NULL;
625 break;
626 }
627
628 if (isOctal) {
629 if (CheckTokenIsTaggedTemplate()) {
630 AssignTokenEscapeError();
631 break;
632 }
633 ThrowError("Octal escape sequences are not allowed in strict mode");
634 }
635
636 [[fallthrough]];
637 }
638 default: {
639 if (IsDecimalDigit(Iterator().Peek())) {
640 if (CheckTokenIsTaggedTemplate()) {
641 AssignTokenEscapeError();
642 break;
643 }
644 ThrowError("Invalid character escape sequence in strict mode");
645 }
646
647 break;
648 }
649 }
650
651 Iterator().Forward(cpSize);
652 str->Append(cp);
653 }
654
ScanQuestionPunctuator()655 void Lexer::ScanQuestionPunctuator()
656 {
657 GetToken().type_ = TokenType::PUNCTUATOR_QUESTION_MARK;
658
659 switch (Iterator().Peek()) {
660 case LEX_CHAR_QUESTION: {
661 GetToken().type_ = TokenType::PUNCTUATOR_NULLISH_COALESCING;
662 Iterator().Forward(1);
663
664 switch (Iterator().Peek()) {
665 case LEX_CHAR_EQUALS: {
666 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_NULLISH_EQUAL;
667 Iterator().Forward(1);
668 break;
669 }
670 default: {
671 break;
672 }
673 }
674
675 break;
676 }
677 case LEX_CHAR_DOT: {
678 Iterator().Forward(1);
679
680 if (!IsDecimalDigit(Iterator().Peek())) {
681 GetToken().type_ = TokenType::PUNCTUATOR_QUESTION_DOT;
682 return;
683 }
684
685 Iterator().Backward(1);
686 break;
687 }
688 default: {
689 break;
690 }
691 }
692 }
693
ScanLessThanPunctuator()694 void Lexer::ScanLessThanPunctuator()
695 {
696 GetToken().type_ = TokenType::PUNCTUATOR_LESS_THAN;
697
698 switch (Iterator().Peek()) {
699 case LEX_CHAR_LESS_THAN: {
700 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_SHIFT;
701 Iterator().Forward(1);
702
703 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
704 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_SHIFT_EQUAL;
705 Iterator().Forward(1);
706 }
707 break;
708 }
709 case LEX_CHAR_EQUALS: {
710 GetToken().type_ = TokenType::PUNCTUATOR_LESS_THAN_EQUAL;
711 Iterator().Forward(1);
712 break;
713 }
714 default: {
715 break;
716 }
717 }
718 }
719
ScanGreaterThanPunctuator()720 void Lexer::ScanGreaterThanPunctuator()
721 {
722 GetToken().type_ = TokenType::PUNCTUATOR_GREATER_THAN;
723
724 switch (Iterator().Peek()) {
725 case LEX_CHAR_GREATER_THAN: {
726 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_SHIFT;
727 Iterator().Forward(1);
728
729 switch (Iterator().Peek()) {
730 case LEX_CHAR_GREATER_THAN: {
731 GetToken().type_ = TokenType::PUNCTUATOR_UNSIGNED_RIGHT_SHIFT;
732 Iterator().Forward(1);
733
734 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
735 GetToken().type_ = TokenType::PUNCTUATOR_UNSIGNED_RIGHT_SHIFT_EQUAL;
736 Iterator().Forward(1);
737 }
738 break;
739 }
740 case LEX_CHAR_EQUALS: {
741 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_SHIFT_EQUAL;
742 Iterator().Forward(1);
743 break;
744 }
745 default: {
746 break;
747 }
748 }
749 break;
750 }
751 case LEX_CHAR_EQUALS: {
752 GetToken().type_ = TokenType::PUNCTUATOR_GREATER_THAN_EQUAL;
753 Iterator().Forward(1);
754 break;
755 }
756 default: {
757 break;
758 }
759 }
760 }
761
ScanEqualsPunctuator()762 void Lexer::ScanEqualsPunctuator()
763 {
764 GetToken().type_ = TokenType::PUNCTUATOR_SUBSTITUTION;
765
766 switch (Iterator().Peek()) {
767 case LEX_CHAR_EQUALS: {
768 GetToken().type_ = TokenType::PUNCTUATOR_EQUAL;
769 Iterator().Forward(1);
770
771 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
772 GetToken().type_ = TokenType::PUNCTUATOR_STRICT_EQUAL;
773 Iterator().Forward(1);
774 }
775 break;
776 }
777 case LEX_CHAR_GREATER_THAN: {
778 GetToken().type_ = TokenType::PUNCTUATOR_ARROW;
779 Iterator().Forward(1);
780 break;
781 }
782 default: {
783 break;
784 }
785 }
786 }
787
ScanExclamationPunctuator()788 void Lexer::ScanExclamationPunctuator()
789 {
790 GetToken().type_ = TokenType::PUNCTUATOR_EXCLAMATION_MARK;
791
792 switch (Iterator().Peek()) {
793 case LEX_CHAR_EQUALS: {
794 GetToken().type_ = TokenType::PUNCTUATOR_NOT_EQUAL;
795 Iterator().Forward(1);
796
797 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
798 GetToken().type_ = TokenType::PUNCTUATOR_NOT_STRICT_EQUAL;
799 Iterator().Forward(1);
800 }
801 break;
802 }
803 default: {
804 break;
805 }
806 }
807 }
808
ScanAmpersandPunctuator()809 void Lexer::ScanAmpersandPunctuator()
810 {
811 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_AND;
812
813 switch (Iterator().Peek()) {
814 case LEX_CHAR_AMPERSAND: {
815 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_AND;
816 Iterator().Forward(1);
817
818 switch (Iterator().Peek()) {
819 case LEX_CHAR_EQUALS: {
820 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_AND_EQUAL;
821 Iterator().Forward(1);
822 break;
823 }
824 default: {
825 break;
826 }
827 }
828
829 break;
830 }
831 case LEX_CHAR_EQUALS: {
832 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_AND_EQUAL;
833 Iterator().Forward(1);
834 break;
835 }
836 default: {
837 break;
838 }
839 }
840 }
841
ScanVLinePunctuator()842 void Lexer::ScanVLinePunctuator()
843 {
844 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_OR;
845
846 switch (Iterator().Peek()) {
847 case LEX_CHAR_VLINE: {
848 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_OR;
849 Iterator().Forward(1);
850
851 switch (Iterator().Peek()) {
852 case LEX_CHAR_EQUALS: {
853 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_OR_EQUAL;
854 Iterator().Forward(1);
855 break;
856 }
857 default: {
858 break;
859 }
860 }
861
862 break;
863 }
864 case LEX_CHAR_EQUALS: {
865 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_OR_EQUAL;
866 Iterator().Forward(1);
867 break;
868 }
869 default: {
870 break;
871 }
872 }
873 }
874
ScanCircumflexPunctuator()875 void Lexer::ScanCircumflexPunctuator()
876 {
877 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_XOR;
878
879 switch (Iterator().Peek()) {
880 case LEX_CHAR_EQUALS: {
881 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_XOR_EQUAL;
882 Iterator().Forward(1);
883 break;
884 }
885 default: {
886 break;
887 }
888 }
889 }
890
ScanPlusPunctuator()891 void Lexer::ScanPlusPunctuator()
892 {
893 GetToken().type_ = TokenType::PUNCTUATOR_PLUS;
894
895 switch (Iterator().Peek()) {
896 case LEX_CHAR_PLUS: {
897 GetToken().type_ = TokenType::PUNCTUATOR_PLUS_PLUS;
898 Iterator().Forward(1);
899 break;
900 }
901 case LEX_CHAR_EQUALS: {
902 GetToken().type_ = TokenType::PUNCTUATOR_PLUS_EQUAL;
903 Iterator().Forward(1);
904 break;
905 }
906 default: {
907 break;
908 }
909 }
910 }
911
ScanMinusPunctuator()912 void Lexer::ScanMinusPunctuator()
913 {
914 GetToken().type_ = TokenType::PUNCTUATOR_MINUS;
915
916 switch (Iterator().Peek()) {
917 case LEX_CHAR_MINUS: {
918 GetToken().type_ = TokenType::PUNCTUATOR_MINUS_MINUS;
919 Iterator().Forward(1);
920 break;
921 }
922 case LEX_CHAR_EQUALS: {
923 GetToken().type_ = TokenType::PUNCTUATOR_MINUS_EQUAL;
924 Iterator().Forward(1);
925 break;
926 }
927 default: {
928 break;
929 }
930 }
931 }
932
ScanSlashPunctuator()933 void Lexer::ScanSlashPunctuator()
934 {
935 GetToken().type_ = TokenType::PUNCTUATOR_DIVIDE;
936
937 switch (Iterator().Peek()) {
938 case LEX_CHAR_EQUALS: {
939 GetToken().type_ = TokenType::PUNCTUATOR_DIVIDE_EQUAL;
940 Iterator().Forward(1);
941 break;
942 }
943 default: {
944 break;
945 }
946 }
947 }
948
ScanDotPunctuator()949 void Lexer::ScanDotPunctuator()
950 {
951 GetToken().type_ = TokenType::PUNCTUATOR_PERIOD;
952
953 switch (Iterator().Peek()) {
954 case LEX_CHAR_0:
955 case LEX_CHAR_1:
956 case LEX_CHAR_2:
957 case LEX_CHAR_3:
958 case LEX_CHAR_4:
959 case LEX_CHAR_5:
960 case LEX_CHAR_6:
961 case LEX_CHAR_7:
962 case LEX_CHAR_8:
963 case LEX_CHAR_9: {
964 ScanNumber();
965 break;
966 }
967 case LEX_CHAR_QUESTION: {
968 GetToken().type_ = TokenType::PUNCTUATOR_PERIOD_QUESTION;
969 Iterator().Forward(1);
970 break;
971 }
972 case LEX_CHAR_DOT: {
973 Iterator().Forward(1);
974
975 if (Iterator().Peek() == LEX_CHAR_DOT) {
976 GetToken().type_ = TokenType::PUNCTUATOR_PERIOD_PERIOD_PERIOD;
977 Iterator().Forward(1);
978 break;
979 }
980
981 Iterator().Backward(1);
982 break;
983 }
984 default: {
985 break;
986 }
987 }
988 }
989
ScanAsterixPunctuator()990 void Lexer::ScanAsterixPunctuator()
991 {
992 GetToken().type_ = TokenType::PUNCTUATOR_MULTIPLY;
993
994 switch (Iterator().Peek()) {
995 case LEX_CHAR_ASTERISK: {
996 GetToken().type_ = TokenType::PUNCTUATOR_EXPONENTIATION;
997 Iterator().Forward(1);
998
999 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
1000 GetToken().type_ = TokenType::PUNCTUATOR_EXPONENTIATION_EQUAL;
1001 Iterator().Forward(1);
1002 }
1003 break;
1004 }
1005 case LEX_CHAR_EQUALS: {
1006 GetToken().type_ = TokenType::PUNCTUATOR_MULTIPLY_EQUAL;
1007 Iterator().Forward(1);
1008 break;
1009 }
1010 default: {
1011 break;
1012 }
1013 }
1014 }
1015
ScanPercentPunctuator()1016 void Lexer::ScanPercentPunctuator()
1017 {
1018 GetToken().type_ = TokenType::PUNCTUATOR_MOD;
1019
1020 switch (Iterator().Peek()) {
1021 case LEX_CHAR_EQUALS: {
1022 GetToken().type_ = TokenType::PUNCTUATOR_MOD_EQUAL;
1023 Iterator().Forward(1);
1024 break;
1025 }
1026 default: {
1027 break;
1028 }
1029 }
1030 }
1031
IsLineTerminatorOrEos() const1032 bool Lexer::IsLineTerminatorOrEos() const
1033 {
1034 switch (Iterator().PeekCp()) {
1035 case util::StringView::Iterator::INVALID_CP:
1036 case LEX_CHAR_LF:
1037 case LEX_CHAR_CR:
1038 case LEX_CHAR_LS:
1039 case LEX_CHAR_PS: {
1040 return true;
1041 }
1042 default: {
1043 break;
1044 }
1045 }
1046
1047 return false;
1048 }
1049
ScanRegExpPattern()1050 void Lexer::ScanRegExpPattern()
1051 {
1052 bool isCharClass = false;
1053 size_t cpSize {};
1054
1055 while (true) {
1056 switch (Iterator().PeekCp(&cpSize)) {
1057 case util::StringView::Iterator::INVALID_CP:
1058 case LEX_CHAR_LF:
1059 case LEX_CHAR_CR:
1060 case LEX_CHAR_LS:
1061 case LEX_CHAR_PS: {
1062 ThrowError("Unterminated RegExp");
1063 break;
1064 }
1065 case LEX_CHAR_SLASH: {
1066 if (!isCharClass) {
1067 return;
1068 }
1069
1070 break;
1071 }
1072 case LEX_CHAR_LEFT_SQUARE: {
1073 isCharClass = true;
1074 break;
1075 }
1076 case LEX_CHAR_RIGHT_SQUARE: {
1077 isCharClass = false;
1078 break;
1079 }
1080 case LEX_CHAR_BACKSLASH: {
1081 Iterator().Forward(1);
1082
1083 if (IsLineTerminatorOrEos()) {
1084 continue;
1085 }
1086
1087 Iterator().PeekCp(&cpSize);
1088 Iterator().Forward(cpSize);
1089 continue;
1090 }
1091 default: {
1092 break;
1093 }
1094 }
1095
1096 Iterator().Forward(cpSize);
1097 }
1098 }
1099
GetRegExpFlag(char32_t cp,RegExpFlags & flag)1100 bool Lexer::GetRegExpFlag(char32_t cp, RegExpFlags &flag)
1101 {
1102 switch (cp) {
1103 case LEX_CHAR_LOWERCASE_G: {
1104 flag = RegExpFlags::GLOBAL;
1105 break;
1106 }
1107 case LEX_CHAR_LOWERCASE_I: {
1108 flag = RegExpFlags::IGNORE_CASE;
1109 break;
1110 }
1111 case LEX_CHAR_LOWERCASE_M: {
1112 flag = RegExpFlags::MULTILINE;
1113 break;
1114 }
1115 case LEX_CHAR_LOWERCASE_S: {
1116 flag = RegExpFlags::DOTALL;
1117 break;
1118 }
1119 case LEX_CHAR_LOWERCASE_U: {
1120 flag = RegExpFlags::UNICODE;
1121 break;
1122 }
1123 case LEX_CHAR_LOWERCASE_Y: {
1124 flag = RegExpFlags::STICKY;
1125 break;
1126 }
1127 case LEX_CHAR_LOWERCASE_D: {
1128 flag = RegExpFlags::HAS_INDICES;
1129 break;
1130 }
1131 default: {
1132 return false;
1133 }
1134 }
1135 return true;
1136 }
1137
ScanRegExpFlags()1138 RegExpFlags Lexer::ScanRegExpFlags()
1139 {
1140 RegExpFlags resultFlags = RegExpFlags::EMPTY;
1141
1142 while (true) {
1143 size_t cpSize {};
1144 auto cp = Iterator().PeekCp(&cpSize);
1145 if (!KeywordsUtil::IsIdentifierPart(cp)) {
1146 break;
1147 }
1148
1149 Iterator().Forward(cpSize);
1150
1151 RegExpFlags flag = RegExpFlags::EMPTY;
1152
1153 if (!GetRegExpFlag(cp, flag)) {
1154 if (cp == LEX_CHAR_SP) {
1155 return resultFlags;
1156 } else {
1157 ThrowError("Invalid RegExp flag");
1158 }
1159 }
1160
1161 if (flag == RegExpFlags::EMPTY || (resultFlags & flag) != 0) {
1162 ThrowError("Invalid RegExp flag");
1163 }
1164
1165 resultFlags = resultFlags | flag;
1166 }
1167
1168 return resultFlags;
1169 }
1170
ScanRegExp()1171 RegExp Lexer::ScanRegExp()
1172 {
1173 GetToken().type_ = TokenType::LITERAL_REGEXP;
1174
1175 const auto patternStart = Iterator().Index();
1176 ScanRegExpPattern();
1177 const auto pattern = SourceView(patternStart, Iterator().Index());
1178
1179 ASSERT(Iterator().Peek() == LEX_CHAR_SLASH);
1180 Iterator().Forward(1);
1181
1182 const auto flagsStart = Iterator().Index();
1183 RegExpFlags resultFlags = ScanRegExpFlags();
1184 const auto flags = SourceView(flagsStart, Iterator().Index());
1185
1186 SkipWhiteSpaces();
1187 SetTokenEnd();
1188
1189 return {pattern, flags, resultFlags};
1190 }
1191
CheckArrow()1192 bool Lexer::CheckArrow()
1193 {
1194 if (Iterator().Peek() != LEX_CHAR_EQUALS) {
1195 return false;
1196 }
1197 Iterator().Forward(1);
1198
1199 bool res = Iterator().Peek() == LEX_CHAR_GREATER_THAN;
1200 Iterator().Backward(1);
1201
1202 return res;
1203 }
1204
SetTokenStart()1205 void Lexer::SetTokenStart()
1206 {
1207 if (pos_.nextTokenLine != 0) {
1208 pos_.line += pos_.nextTokenLine;
1209 pos_.nextTokenLine = 0;
1210 GetToken().flags_ = TokenFlags::NEW_LINE;
1211 } else {
1212 GetToken().flags_ = TokenFlags::NONE;
1213 }
1214
1215 pos_.token.loc_.start = SourcePosition {Iterator().Index(), pos_.line};
1216 }
1217
SetTokenEnd()1218 void Lexer::SetTokenEnd()
1219 {
1220 pos_.token.loc_.end = SourcePosition {Iterator().Index(), pos_.line};
1221 }
1222
CheckAwaitKeyword()1223 void Lexer::CheckAwaitKeyword()
1224 {
1225 if (parserContext_->IsStaticBlock()) {
1226 ThrowError("'await' is not allowed in class static block");
1227 }
1228 // support top level await for module
1229 if (!parserContext_->IsAsync()) {
1230 if (!parserContext_->IsModule() || parserContext_->GetProgram()->IsDtsFile()) {
1231 GetToken().type_ = TokenType::LITERAL_IDENT;
1232 return;
1233 }
1234 if (parserContext_->GetProgram()->Extension() == ScriptExtension::TS) {
1235 if (parserContext_->IsTsModule()) {
1236 GetToken().type_ = TokenType::LITERAL_IDENT;
1237 return;
1238 }
1239 }
1240 }
1241
1242 if (parserContext_->DisallowAwait()) {
1243 ThrowError("'await' is not allowed");
1244 }
1245 if (GetToken().flags_ & TokenFlags::HAS_ESCAPE) {
1246 ThrowError("Keyword must not contain escaped characters");
1247 }
1248 GetToken().type_ = TokenType::KEYW_AWAIT;
1249 }
1250
CheckArgumentsKeyword()1251 void Lexer::CheckArgumentsKeyword()
1252 {
1253 if (parserContext_->DisallowArguments()) {
1254 ThrowError("'arguments' is not allowed in static block and field initializer");
1255 }
1256 }
1257
CheckKeywordEscape(TokenType type)1258 void Lexer::CheckKeywordEscape(TokenType type)
1259 {
1260 if (GetToken().flags_ & TokenFlags::HAS_ESCAPE) {
1261 ThrowError("Escape sequences are not allowed in keywords");
1262 }
1263
1264 GetToken().type_ = type;
1265 }
1266
CheckEnumKeyword()1267 void Lexer::CheckEnumKeyword()
1268 {
1269 if (parserContext_->GetProgram()->Extension() == ScriptExtension::JS) {
1270 ThrowError("Unexpected reserved keyword");
1271 }
1272
1273 if (GetToken().flags_ & TokenFlags::HAS_ESCAPE) {
1274 ThrowError("Escape sequences are not allowed in keywords");
1275 }
1276
1277 GetToken().type_ = TokenType::LITERAL_IDENT;
1278 }
1279
CheckLetKeyword()1280 void Lexer::CheckLetKeyword()
1281 {
1282 GetToken().type_ = TokenType::KEYW_LET;
1283 }
1284
CheckYieldKeyword()1285 void Lexer::CheckYieldKeyword()
1286 {
1287 if (!parserContext_->AllowYield()) {
1288 ThrowError("'yield' is not allowed");
1289 }
1290
1291 GetToken().type_ = TokenType::KEYW_YIELD;
1292 }
1293
CheckFutureReservedKeyword(TokenType keywordType)1294 void Lexer::CheckFutureReservedKeyword(TokenType keywordType)
1295 {
1296 GetToken().type_ = TokenType::LITERAL_IDENT;
1297
1298 if (parserContext_->GetProgram()->Extension() == ScriptExtension::TS && keywordType <= TokenType::KEYW_INTERFACE) {
1299 return;
1300 }
1301
1302 ThrowError("Unexpected strict mode reserved keyword");
1303 }
1304
SkipWhiteSpaces()1305 void Lexer::SkipWhiteSpaces()
1306 {
1307 while (true) {
1308 auto cp = Iterator().Peek();
1309
1310 switch (cp) {
1311 case LEX_CHAR_HASH_MARK: {
1312 Iterator().Forward(1);
1313 cp = Iterator().Peek();
1314 if (cp != LEX_CHAR_EXCLAMATION) {
1315 Iterator().Backward(1);
1316 return;
1317 }
1318 if (Iterator().Index() != 1) {
1319 /*
1320 * according to ECMA-262 specification item 12.5 Hashbang Comments are location-sensitive.
1321 * only allowed occurs at the beginning of files, other position is illegal.
1322 */
1323 Iterator().Backward(1);
1324 ThrowError("Invalid or unexpected token");
1325 }
1326
1327 Iterator().Forward(1);
1328 SkipSingleLineComment();
1329 continue;
1330 }
1331 case LEX_CHAR_CR: {
1332 Iterator().Forward(1);
1333
1334 if (Iterator().Peek() != LEX_CHAR_LF) {
1335 Iterator().Backward(1);
1336 }
1337
1338 [[fallthrough]];
1339 }
1340 case LEX_CHAR_LF: {
1341 Iterator().Forward(1);
1342 pos_.nextTokenLine++;
1343 continue;
1344 }
1345 case LEX_CHAR_VT:
1346 case LEX_CHAR_FF:
1347 case LEX_CHAR_SP:
1348 case LEX_CHAR_TAB: {
1349 Iterator().Forward(1);
1350 continue;
1351 }
1352 case LEX_CHAR_SLASH: {
1353 Iterator().Forward(1);
1354 cp = Iterator().Peek();
1355 if (cp == LEX_CHAR_SLASH) {
1356 Iterator().Forward(1);
1357 SkipSingleLineComment();
1358 continue;
1359 }
1360 if (cp == LEX_CHAR_ASTERISK) {
1361 Iterator().Forward(1);
1362 SkipMultiLineComment();
1363 continue;
1364 }
1365
1366 Iterator().Backward(1);
1367 return;
1368 }
1369 default: {
1370 if (cp < LEX_ASCII_MAX_BITS) {
1371 return;
1372 }
1373
1374 size_t cpSize {};
1375 cp = Iterator().PeekCp(&cpSize);
1376
1377 switch (cp) {
1378 case LEX_CHAR_LS:
1379 case LEX_CHAR_PS: {
1380 pos_.nextTokenLine++;
1381 [[fallthrough]];
1382 }
1383 case LEX_CHAR_NBSP:
1384 case LEX_CHAR_NLINE:
1385 case LEX_CHAR_IGSP:
1386 case LEX_CHAR_ZWNBSP: {
1387 Iterator().Forward(cpSize);
1388 continue;
1389 }
1390 default: {
1391 return;
1392 }
1393 }
1394 }
1395 }
1396 }
1397 }
1398
1399 // NOLINTNEXTLINE(readability-function-size)
NextToken(LexerNextTokenFlags flags)1400 void Lexer::NextToken(LexerNextTokenFlags flags)
1401 {
1402 Keywords kws(this, flags);
1403 KeywordsUtil &kwu = kws.Util();
1404
1405 SetTokenStart();
1406
1407 auto cp = Iterator().Peek();
1408 Iterator().Forward(1);
1409
1410 GetToken().keywordType_ = TokenType::EOS;
1411
1412 switch (cp) {
1413 case LEX_CHAR_EXCLAMATION: {
1414 ScanExclamationPunctuator();
1415 break;
1416 }
1417 case LEX_CHAR_SINGLE_QUOTE: {
1418 ScanString<LEX_CHAR_SINGLE_QUOTE>();
1419 break;
1420 }
1421 case LEX_CHAR_DOUBLE_QUOTE: {
1422 ScanString<LEX_CHAR_DOUBLE_QUOTE>();
1423 break;
1424 }
1425 case LEX_CHAR_HASH_MARK: {
1426 GetToken().type_ = TokenType::PUNCTUATOR_HASH_MARK;
1427 break;
1428 }
1429 case LEX_CHAR_PERCENT: {
1430 ScanPercentPunctuator();
1431 break;
1432 }
1433 case LEX_CHAR_AMPERSAND: {
1434 ScanAmpersandPunctuator();
1435 break;
1436 }
1437 case LEX_CHAR_LEFT_PAREN: {
1438 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_PARENTHESIS;
1439 break;
1440 }
1441 case LEX_CHAR_RIGHT_PAREN: {
1442 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_PARENTHESIS;
1443 break;
1444 }
1445 case LEX_CHAR_ASTERISK: {
1446 ScanAsterixPunctuator();
1447 break;
1448 }
1449 case LEX_CHAR_PLUS: {
1450 ScanPlusPunctuator();
1451 break;
1452 }
1453 case LEX_CHAR_COMMA: {
1454 GetToken().type_ = TokenType::PUNCTUATOR_COMMA;
1455 break;
1456 }
1457 case LEX_CHAR_MINUS: {
1458 ScanMinusPunctuator();
1459 break;
1460 }
1461 case LEX_CHAR_DOT: {
1462 ScanDotPunctuator();
1463 break;
1464 }
1465 case LEX_CHAR_SLASH: {
1466 ScanSlashPunctuator();
1467 break;
1468 }
1469 case LEX_CHAR_0: {
1470 ScanNumberLeadingZero();
1471 break;
1472 }
1473 case LEX_CHAR_1:
1474 case LEX_CHAR_2:
1475 case LEX_CHAR_3:
1476 case LEX_CHAR_4:
1477 case LEX_CHAR_5:
1478 case LEX_CHAR_6:
1479 case LEX_CHAR_7:
1480 case LEX_CHAR_8:
1481 case LEX_CHAR_9: {
1482 ScanNumber();
1483 break;
1484 }
1485 case LEX_CHAR_COLON: {
1486 GetToken().type_ = TokenType::PUNCTUATOR_COLON;
1487 break;
1488 }
1489 case LEX_CHAR_SEMICOLON: {
1490 GetToken().type_ = TokenType::PUNCTUATOR_SEMI_COLON;
1491 break;
1492 }
1493 case LEX_CHAR_LESS_THAN: {
1494 ScanLessThanPunctuator();
1495 break;
1496 }
1497 case LEX_CHAR_EQUALS: {
1498 ScanEqualsPunctuator();
1499 break;
1500 }
1501 case LEX_CHAR_GREATER_THAN: {
1502 ScanGreaterThanPunctuator();
1503 break;
1504 }
1505 case LEX_CHAR_QUESTION: {
1506 ScanQuestionPunctuator();
1507 break;
1508 }
1509 case LEX_CHAR_AT: {
1510 GetToken().type_ = TokenType::PUNCTUATOR_AT;
1511 break;
1512 }
1513 case LEX_CHAR_DOLLAR_SIGN:
1514 case LEX_CHAR_UPPERCASE_A:
1515 case LEX_CHAR_UPPERCASE_B:
1516 case LEX_CHAR_UPPERCASE_C:
1517 case LEX_CHAR_UPPERCASE_D:
1518 case LEX_CHAR_UPPERCASE_E:
1519 case LEX_CHAR_UPPERCASE_F:
1520 case LEX_CHAR_UPPERCASE_G:
1521 case LEX_CHAR_UPPERCASE_H:
1522 case LEX_CHAR_UPPERCASE_I:
1523 case LEX_CHAR_UPPERCASE_J:
1524 case LEX_CHAR_UPPERCASE_K:
1525 case LEX_CHAR_UPPERCASE_L:
1526 case LEX_CHAR_UPPERCASE_M:
1527 case LEX_CHAR_UPPERCASE_N:
1528 case LEX_CHAR_UPPERCASE_O:
1529 case LEX_CHAR_UPPERCASE_P:
1530 case LEX_CHAR_UPPERCASE_Q:
1531 case LEX_CHAR_UPPERCASE_R:
1532 case LEX_CHAR_UPPERCASE_S:
1533 case LEX_CHAR_UPPERCASE_T:
1534 case LEX_CHAR_UPPERCASE_U:
1535 case LEX_CHAR_UPPERCASE_V:
1536 case LEX_CHAR_UPPERCASE_W:
1537 case LEX_CHAR_UPPERCASE_X:
1538 case LEX_CHAR_UPPERCASE_Y:
1539 case LEX_CHAR_UPPERCASE_Z:
1540 case LEX_CHAR_UNDERSCORE:
1541 case LEX_CHAR_LOWERCASE_H:
1542 case LEX_CHAR_LOWERCASE_J:
1543 case LEX_CHAR_LOWERCASE_Q:
1544 case LEX_CHAR_LOWERCASE_X:
1545 case LEX_CHAR_LOWERCASE_Z: {
1546 kwu.ScanIdContinue();
1547 break;
1548 }
1549 case LEX_CHAR_LEFT_SQUARE: {
1550 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_SQUARE_BRACKET;
1551 break;
1552 }
1553 case LEX_CHAR_BACKSLASH: {
1554 GetToken().flags_ |= TokenFlags::HAS_ESCAPE;
1555
1556 if (Iterator().Peek() != LEX_CHAR_LOWERCASE_U) {
1557 ThrowError("Invalid character");
1558 }
1559
1560 cp = ScanUnicodeEscapeSequence();
1561
1562 kwu.ScanIdentifierStart(cp);
1563 break;
1564 }
1565 case LEX_CHAR_RIGHT_SQUARE: {
1566 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_SQUARE_BRACKET;
1567 break;
1568 }
1569 case LEX_CHAR_CIRCUMFLEX: {
1570 ScanCircumflexPunctuator();
1571 break;
1572 }
1573 case LEX_CHAR_BACK_TICK: {
1574 GetToken().type_ = TokenType::PUNCTUATOR_BACK_TICK;
1575 SetTokenEnd();
1576 return;
1577 }
1578 case LEX_CHAR_LOWERCASE_A: {
1579 kws.ScanA();
1580 break;
1581 }
1582 case LEX_CHAR_LOWERCASE_B: {
1583 kws.ScanB();
1584 break;
1585 }
1586 case LEX_CHAR_LOWERCASE_C: {
1587 kws.ScanC();
1588 break;
1589 }
1590 case LEX_CHAR_LOWERCASE_D: {
1591 kws.ScanD();
1592 break;
1593 }
1594 case LEX_CHAR_LOWERCASE_E: {
1595 kws.ScanE();
1596 break;
1597 }
1598 case LEX_CHAR_LOWERCASE_F: {
1599 kws.ScanF();
1600 break;
1601 }
1602 case LEX_CHAR_LOWERCASE_G: {
1603 kws.ScanG();
1604 break;
1605 }
1606 case LEX_CHAR_LOWERCASE_I: {
1607 kws.ScanI();
1608 break;
1609 }
1610 case LEX_CHAR_LOWERCASE_K: {
1611 kws.ScanK();
1612 break;
1613 }
1614 case LEX_CHAR_LOWERCASE_L: {
1615 kws.ScanL();
1616 break;
1617 }
1618 case LEX_CHAR_LOWERCASE_M: {
1619 kws.ScanM();
1620 break;
1621 }
1622 case LEX_CHAR_LOWERCASE_N: {
1623 kws.ScanN();
1624 break;
1625 }
1626 case LEX_CHAR_LOWERCASE_O: {
1627 kws.ScanO();
1628 break;
1629 }
1630 case LEX_CHAR_LOWERCASE_P: {
1631 kws.ScanP();
1632 break;
1633 }
1634 case LEX_CHAR_LOWERCASE_R: {
1635 kws.ScanR();
1636 break;
1637 }
1638 case LEX_CHAR_LOWERCASE_S: {
1639 kws.ScanS();
1640 break;
1641 }
1642 case LEX_CHAR_LOWERCASE_T: {
1643 kws.ScanT();
1644 break;
1645 }
1646 case LEX_CHAR_LOWERCASE_U: {
1647 kws.ScanU();
1648 break;
1649 }
1650 case LEX_CHAR_LOWERCASE_V: {
1651 kws.ScanV();
1652 break;
1653 }
1654 case LEX_CHAR_LOWERCASE_W: {
1655 kws.ScanW();
1656 break;
1657 }
1658 case LEX_CHAR_LOWERCASE_Y: {
1659 kws.ScanY();
1660 break;
1661 }
1662 case LEX_CHAR_LEFT_BRACE: {
1663 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_BRACE;
1664
1665 if (tlCtx_) {
1666 tlCtx_->ConsumeLeftBrace();
1667 }
1668
1669 break;
1670 }
1671 case LEX_CHAR_VLINE: {
1672 ScanVLinePunctuator();
1673 break;
1674 }
1675 case LEX_CHAR_RIGHT_BRACE: {
1676 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_BRACE;
1677
1678 if (tlCtx_ && tlCtx_->ConsumeRightBrace()) {
1679 SetTokenEnd();
1680 return;
1681 }
1682
1683 break;
1684 }
1685 case LEX_CHAR_TILDE: {
1686 GetToken().type_ = TokenType::PUNCTUATOR_TILDE;
1687 break;
1688 }
1689 default: {
1690 Iterator().Backward(1);
1691
1692 if (cp == util::StringView::Iterator::INVALID_CP) {
1693 GetToken().type_ = TokenType::EOS;
1694 break;
1695 }
1696
1697 cp = Iterator().Next();
1698 kwu.ScanIdentifierStart(cp);
1699 break;
1700 }
1701 }
1702
1703 SetTokenEnd();
1704 SkipWhiteSpaces();
1705 }
1706
AssignTokenEscapeError()1707 void Lexer::AssignTokenEscapeError()
1708 {
1709 GetToken().flags_ |= TokenFlags::ESCAPE_ERROR;
1710 }
1711
AssignTokenTaggedTemplate()1712 void Lexer::AssignTokenTaggedTemplate()
1713 {
1714 GetToken().flags_ |= TokenFlags::TAGGED_TEMPLATE;
1715 }
1716
CheckTokenIsTaggedTemplate() const1717 bool Lexer::CheckTokenIsTaggedTemplate() const
1718 {
1719 return GetToken().IsTaggedTemplate();
1720 }
1721
1722 } // namespace panda::es2panda::lexer
1723