1 /**
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "lexer.h"
17
18 #include "generated/keywords.h"
19
20 namespace ark::es2panda::lexer {
LexerPosition(const util::StringView & source)21 LexerPosition::LexerPosition(const util::StringView &source) : iterator_(source) {}
22
Lexer(const parser::ParserContext * parserContext,util::ErrorLogger * errorLogger,bool startLexer)23 Lexer::Lexer(const parser::ParserContext *parserContext, util::ErrorLogger *errorLogger, bool startLexer)
24 : allocator_(parserContext->GetProgram()->Allocator()),
25 parserContext_(parserContext),
26 source_(parserContext->GetProgram()->SourceCode()),
27 pos_(source_),
28 errorLogger_(errorLogger)
29 {
30 if (startLexer) {
31 SkipWhiteSpaces();
32 }
33 }
34
ScanUnicodeEscapeSequence()35 char32_t Lexer::ScanUnicodeEscapeSequence()
36 {
37 ASSERT(Iterator().Peek() == LEX_CHAR_LOWERCASE_U);
38 auto constexpr UNICODE_ESCAPE_SEQUENCE_LENGTH = 4;
39
40 Iterator().Forward(1);
41
42 if (Iterator().Peek() == LEX_CHAR_LEFT_BRACE) {
43 Iterator().Forward(1);
44 return ScanUnicodeCodePointEscape();
45 }
46
47 if (parserContext_->GetProgram()->Extension() == ScriptExtension::AS) {
48 return ScanHexEscape<UNICODE_ESCAPE_SEQUENCE_LENGTH, true>();
49 }
50
51 return ScanHexEscape<UNICODE_ESCAPE_SEQUENCE_LENGTH>();
52 }
53
54 // '\u{...}' escape sequence should have at least one hex digit inside brackets!
ScanUnicodeCodePointEscape()55 char32_t Lexer::ScanUnicodeCodePointEscape()
56 {
57 char32_t code = 0;
58 char32_t cp = Iterator().Peek();
59 if (!IsHexDigit(cp)) {
60 LogSyntaxError("Hexadecimal digit expected.");
61 code = UNICODE_INVALID_CP;
62 }
63
64 while (true) {
65 Iterator().Forward(1);
66
67 constexpr auto MULTIPLIER = 16;
68 code = code * MULTIPLIER + HexValue(cp);
69 if (code > UNICODE_CODE_POINT_MAX) {
70 LogSyntaxError("Invalid unicode escape sequence");
71 code = UNICODE_INVALID_CP;
72 break;
73 }
74
75 cp = Iterator().Peek();
76 if (!IsHexDigit(cp)) {
77 break;
78 }
79 }
80
81 if (cp != LEX_CHAR_RIGHT_BRACE) {
82 LogSyntaxError("Invalid unicode escape sequence");
83 code = UNICODE_INVALID_CP;
84 }
85
86 Iterator().Forward(1);
87 return code;
88 }
89
Allocator()90 ArenaAllocator *Lexer::Allocator()
91 {
92 return allocator_;
93 }
94
GetToken()95 Token &Lexer::GetToken()
96 {
97 return pos_.token_;
98 }
99
GetToken() const100 const Token &Lexer::GetToken() const
101 {
102 return pos_.token_;
103 }
104
Line() const105 size_t Lexer::Line() const
106 {
107 return pos_.line_;
108 }
109
Save() const110 LexerPosition Lexer::Save() const
111 {
112 return pos_;
113 }
114
BackwardToken(TokenType type,size_t offset)115 void Lexer::BackwardToken(TokenType type, size_t offset)
116 {
117 pos_.token_.type_ = type;
118 pos_.iterator_.Reset(GetToken().End().index - offset);
119 pos_.nextTokenLine_ = 0;
120 }
121
ForwardToken(TokenType type,size_t offset)122 void Lexer::ForwardToken(TokenType type, size_t offset)
123 {
124 pos_.token_.type_ = type;
125 pos_.iterator_.Forward(offset);
126 SkipWhiteSpaces();
127 }
128
Rewind(const LexerPosition & pos)129 void Lexer::Rewind(const LexerPosition &pos)
130 {
131 pos_ = pos;
132 }
133
Lookahead()134 char32_t Lexer::Lookahead()
135 {
136 return Iterator().Peek();
137 }
138
SourceView(const util::StringView::Iterator & begin,const util::StringView::Iterator & end) const139 util::StringView Lexer::SourceView(const util::StringView::Iterator &begin, const util::StringView::Iterator &end) const
140 {
141 return SourceView(begin.Index(), end.Index());
142 }
143
SourceView(size_t begin,size_t end) const144 util::StringView Lexer::SourceView(size_t begin, size_t end) const
145 {
146 return source_.Substr(begin, end);
147 }
148
SkipMultiLineComment()149 void Lexer::SkipMultiLineComment()
150 {
151 while (true) {
152 switch (Iterator().Next()) {
153 case util::StringView::Iterator::INVALID_CP: {
154 LogSyntaxError("Unterminated multi-line comment");
155 return;
156 }
157 case LEX_CHAR_LF:
158 case LEX_CHAR_CR:
159 case LEX_CHAR_LS:
160 case LEX_CHAR_PS: {
161 pos_.nextTokenLine_++;
162 continue;
163 }
164 case LEX_CHAR_ASTERISK: {
165 if (Iterator().Peek() == LEX_CHAR_SLASH) {
166 Iterator().Forward(1);
167 return;
168 }
169
170 break;
171 }
172 default: {
173 break;
174 }
175 }
176 }
177 }
178
179 /* New line character is not processed */
SkipSingleLineComment()180 void Lexer::SkipSingleLineComment()
181 {
182 while (true) {
183 switch (Iterator().Next()) {
184 case util::StringView::Iterator::INVALID_CP:
185 case LEX_CHAR_CR: {
186 if (Iterator().Peek() == LEX_CHAR_LF) {
187 Iterator().Forward(1);
188 }
189
190 [[fallthrough]];
191 }
192 case LEX_CHAR_LF:
193 case LEX_CHAR_LS:
194 case LEX_CHAR_PS: {
195 pos_.nextTokenLine_++;
196 return;
197 }
198 default: {
199 break;
200 }
201 }
202 }
203 }
204
LogSyntaxError(std::string_view const errorMessage) const205 void Lexer::LogSyntaxError(std::string_view const errorMessage) const
206 {
207 lexer::LineIndex index(source_);
208 lexer::SourceLocation loc = index.GetLocation(SourcePosition(Iterator().Index(), pos_.line_));
209 errorLogger_->WriteLog(Error {ErrorType::SYNTAX, parserContext_->GetProgram()->SourceFilePath().Utf8(),
210 errorMessage, loc.line, loc.col});
211 }
212
LogUnexpectedToken(lexer::TokenType const tokenType) const213 void Lexer::LogUnexpectedToken(lexer::TokenType const tokenType) const
214 {
215 std::stringstream ss;
216 ss << "Unexpected token: '" << TokenToString(tokenType) << "'.";
217 LogSyntaxError(ss.str());
218 }
219
CheckNumberLiteralEnd()220 void Lexer::CheckNumberLiteralEnd()
221 {
222 if (Iterator().Peek() == LEX_CHAR_LOWERCASE_N) {
223 GetToken().flags_ |= TokenFlags::NUMBER_BIGINT;
224 GetToken().src_ = SourceView(GetToken().Start().index, Iterator().Index());
225 Iterator().Forward(1);
226 } else {
227 GetToken().src_ = SourceView(GetToken().Start().index, Iterator().Index());
228 }
229
230 const auto nextCp = Iterator().PeekCp();
231 if (KeywordsUtil::IsIdentifierStart(nextCp) || IsDecimalDigit(nextCp)) {
232 LogSyntaxError("Invalid numeric literal");
233 }
234 }
235
ScanDecimalNumbers()236 void Lexer::ScanDecimalNumbers()
237 {
238 bool allowNumericOnNext = true;
239
240 while (true) {
241 switch (Iterator().Peek()) {
242 case LEX_CHAR_0:
243 case LEX_CHAR_1:
244 case LEX_CHAR_2:
245 case LEX_CHAR_3:
246 case LEX_CHAR_4:
247 case LEX_CHAR_5:
248 case LEX_CHAR_6:
249 case LEX_CHAR_7:
250 case LEX_CHAR_8:
251 case LEX_CHAR_9: {
252 Iterator().Forward(1);
253 allowNumericOnNext = true;
254 break;
255 }
256 case LEX_CHAR_UNDERSCORE: {
257 Iterator().Backward(1);
258
259 if (Iterator().Peek() == LEX_CHAR_DOT || !allowNumericOnNext) {
260 Iterator().Forward(1);
261 LogSyntaxError("Invalid numeric separator");
262 }
263
264 GetToken().flags_ |= TokenFlags::NUMBER_HAS_UNDERSCORE;
265 Iterator().Forward(2U);
266 allowNumericOnNext = false;
267 break;
268 }
269 default: {
270 if (!allowNumericOnNext) {
271 LogSyntaxError("Numeric separators are not allowed at the end of numeric literals");
272 }
273 return;
274 }
275 }
276 }
277 }
278
ConvertNumber(NumberFlags flags)279 void Lexer::ConvertNumber([[maybe_unused]] NumberFlags flags)
280 {
281 ConversionResult res;
282 const long double temp = StrToNumeric(&std::strtold, GetToken().src_.Utf8().data(), res);
283 if (res == ConversionResult::SUCCESS) {
284 GetToken().number_ = Number(GetToken().src_, static_cast<double>(temp));
285 } else if (res == ConversionResult::INVALID_ARGUMENT) {
286 LogSyntaxError("Invalid number");
287 } else if (res == ConversionResult::OUT_OF_RANGE) {
288 GetToken().number_ = Number(GetToken().src_, std::numeric_limits<double>::infinity());
289 }
290 }
291
ScanNumber(bool const leadingMinus,bool allowBigInt)292 void Lexer::ScanNumber(bool const leadingMinus, bool allowBigInt)
293 {
294 const bool isPeriod = GetToken().type_ == TokenType::PUNCTUATOR_PERIOD;
295 GetToken().type_ = TokenType::LITERAL_NUMBER;
296 GetToken().keywordType_ = TokenType::LITERAL_NUMBER;
297
298 if (!isPeriod) {
299 ScanDecimalNumbers();
300 }
301
302 bool parseExponent = true;
303 auto flags = NumberFlags::NONE;
304
305 if (Iterator().Peek() == LEX_CHAR_DOT || isPeriod) {
306 flags |= NumberFlags::DECIMAL_POINT;
307 allowBigInt = false;
308 if (!isPeriod) {
309 Iterator().Forward(1);
310 }
311
312 auto cp = Iterator().Peek();
313 if (IsDecimalDigit(cp) || cp == LEX_CHAR_LOWERCASE_E || cp == LEX_CHAR_UPPERCASE_E) {
314 ScanDecimalNumbers();
315 } else {
316 parseExponent = false;
317 }
318 }
319
320 auto const signPosition = ScanCharLex(parseExponent, allowBigInt, flags);
321
322 CheckNumberLiteralEnd();
323
324 if ((GetToken().flags_ & TokenFlags::NUMBER_BIGINT) != 0) {
325 if (!allowBigInt) {
326 LogSyntaxError("Invalid BigInt number");
327 }
328
329 return;
330 }
331
332 util::StringView sv = SourceView(GetToken().Start().index, Iterator().Index());
333 std::string utf8 = !leadingMinus ? std::string {sv.Utf8()} : '-' + std::string {sv.Utf8()};
334 bool needConversion = leadingMinus;
335
336 if (signPosition) {
337 utf8.insert(*signPosition + (!leadingMinus ? 0U : 1U), 1U, '+');
338 needConversion = true;
339 }
340
341 if ((GetToken().flags_ & TokenFlags::NUMBER_HAS_UNDERSCORE) != 0U) {
342 utf8.erase(std::remove(utf8.begin(), utf8.end(), LEX_CHAR_UNDERSCORE), utf8.end());
343 needConversion = true;
344 }
345
346 GetToken().src_ = needConversion ? util::UString(utf8, Allocator()).View() : sv;
347
348 ConvertNumber(flags);
349 }
350
ScanCharLex(bool const parseExponent,bool & allowBigInt,NumberFlags & flags)351 std::optional<std::size_t> Lexer::ScanCharLex(bool const parseExponent, bool &allowBigInt, NumberFlags &flags)
352 {
353 std::optional<std::size_t> rc {};
354
355 if (auto const ch = Iterator().Peek(); ch == LEX_CHAR_LOWERCASE_E || ch == LEX_CHAR_UPPERCASE_E) {
356 allowBigInt = false;
357
358 if (parseExponent) {
359 flags |= NumberFlags::EXPONENT;
360
361 Iterator().Forward(1);
362
363 rc = ScanSignOfNumber();
364
365 if (!IsDecimalDigit(Iterator().Peek())) {
366 LogSyntaxError("Invalid numeric literal");
367 }
368 ScanDecimalNumbers();
369 }
370 }
371
372 return rc;
373 }
374
ScanSignOfNumber()375 std::optional<std::size_t> Lexer::ScanSignOfNumber() noexcept
376 {
377 switch (Iterator().Peek()) {
378 case LEX_CHAR_UNDERSCORE: {
379 break;
380 }
381 case LEX_CHAR_PLUS:
382 case LEX_CHAR_MINUS: {
383 Iterator().Forward(1);
384 break;
385 }
386 default: {
387 return std::make_optional(Iterator().Index() - GetToken().Start().index);
388 }
389 }
390 return std::nullopt;
391 }
392
PushTemplateContext(TemplateLiteralParserContext * ctx)393 void Lexer::PushTemplateContext(TemplateLiteralParserContext *ctx)
394 {
395 tlCtx_ = ctx;
396 }
397
ScanTemplateStringEnd()398 void Lexer::ScanTemplateStringEnd()
399 {
400 if (Iterator().Peek() == LEX_CHAR_BACK_TICK) {
401 Iterator().Forward(1);
402 SetTokenEnd();
403 SkipWhiteSpaces();
404 } else {
405 LogSyntaxError("Unexpected token, expected '`'");
406 }
407 }
408
CheckOctalDigit(char32_t const nextCp)409 bool Lexer::CheckOctalDigit(char32_t const nextCp)
410 {
411 if (IsOctalDigit(nextCp)) {
412 Iterator().Forward(1);
413
414 if (Iterator().Peek() != LEX_CHAR_BACK_TICK) {
415 LogSyntaxError("Octal escape sequences are not allowed in template strings");
416 return false;
417 }
418
419 Iterator().Backward(1);
420 }
421 return true;
422 }
423
ScanTemplateStringCpHelper(char32_t cp,LexerTemplateString templateStr)424 std::tuple<bool, bool, LexerTemplateString> Lexer::ScanTemplateStringCpHelper(char32_t cp,
425 LexerTemplateString templateStr)
426 {
427 switch (cp) {
428 case util::StringView::Iterator::INVALID_CP:
429 LogSyntaxError("Unexpected token, expected '${' or '`'");
430 return {true, false, templateStr};
431 case LEX_CHAR_BACK_TICK:
432 templateStr.end = Iterator().Index();
433 return {true, false, templateStr};
434 case LEX_CHAR_CR: {
435 Iterator().Forward(1);
436
437 if (Iterator().Peek() != LEX_CHAR_LF) {
438 Iterator().Backward(1);
439 }
440
441 [[fallthrough]];
442 }
443 case LEX_CHAR_LF:
444 pos_.line_++;
445 templateStr.str.Append(LEX_CHAR_LF);
446 Iterator().Forward(1);
447 return {false, true, templateStr};
448 case LEX_CHAR_BACKSLASH: {
449 Iterator().Forward(1);
450
451 char32_t nextCp = Iterator().Peek();
452 templateStr.validSequence = CheckOctalDigit(nextCp);
453
454 if (nextCp == LEX_CHAR_BACK_TICK || nextCp == LEX_CHAR_BACKSLASH || nextCp == LEX_CHAR_DOLLAR_SIGN) {
455 templateStr.str.Append(cp);
456 templateStr.str.Append(nextCp);
457 Iterator().Forward(1);
458 return {false, true, templateStr};
459 }
460
461 Iterator().Backward(1);
462 return {false, false, templateStr};
463 }
464 case LEX_CHAR_DOLLAR_SIGN:
465 templateStr.end = Iterator().Index();
466 Iterator().Forward(1);
467
468 if (Iterator().Peek() == LEX_CHAR_LEFT_BRACE) {
469 Iterator().Forward(1);
470 templateStr.scanExpression = true;
471 SkipWhiteSpaces();
472 return {true, false, templateStr};
473 }
474
475 templateStr.str.Append(cp);
476 return {false, true, templateStr};
477 default:
478 return {false, false, templateStr};
479 }
480 return {false, false, templateStr};
481 }
482
ScanTemplateString()483 LexerTemplateString Lexer::ScanTemplateString()
484 {
485 LexerTemplateString templateStr(Allocator());
486 size_t cpSize = 0U;
487
488 while (true) {
489 char32_t cp = Iterator().PeekCp(&cpSize);
490
491 bool isReturn = false;
492 bool isContinue = false;
493 std::tie(isReturn, isContinue, templateStr) = ScanTemplateStringCpHelper(cp, templateStr);
494 if (isReturn) {
495 return templateStr;
496 }
497 if (isContinue) {
498 continue;
499 }
500
501 templateStr.str.Append(cp);
502 Iterator().Forward(cpSize);
503 }
504
505 UNREACHABLE();
506 return templateStr;
507 }
508
ResetTokenEnd()509 void Lexer::ResetTokenEnd()
510 {
511 SetTokenStart();
512 pos_.iterator_.Reset(GetToken().End().index);
513 pos_.line_ = GetToken().End().line;
514 pos_.nextTokenLine_ = 0;
515 }
516
ScanStringUnicodePart(util::UString * str)517 bool Lexer::ScanStringUnicodePart(util::UString *str)
518 {
519 const auto savedLine = Pos().Line();
520 const auto cp = ScanUnicodeCharacter();
521 if (Pos().Line() > savedLine) {
522 return true;
523 }
524
525 if (cp == util::StringView::Iterator::INVALID_CP) {
526 return false;
527 }
528
529 str->Append(cp);
530 return true;
531 }
532
ScanUnicodeCharacterHelper(size_t cpSize,char32_t cp)533 char32_t Lexer::ScanUnicodeCharacterHelper(size_t cpSize, char32_t cp)
534 {
535 Iterator().Forward(cpSize);
536 return cp;
537 }
538
539 // CC-OFFNXT(huge_method,G.FUN.01) big switch-case, solid logic
ScanUnicodeCharacter()540 char32_t Lexer::ScanUnicodeCharacter()
541 {
542 size_t cpSize {};
543 char32_t cp = Iterator().PeekCp(&cpSize);
544
545 switch (cp) {
546 case util::StringView::Iterator::INVALID_CP:
547 LogSyntaxError("Unterminated string");
548 break;
549 case LEX_CHAR_CR:
550 Iterator().Forward(1);
551 if (Iterator().Peek() != LEX_CHAR_LF) {
552 Iterator().Backward(1);
553 }
554
555 [[fallthrough]];
556 case LEX_CHAR_LS:
557 case LEX_CHAR_PS:
558 case LEX_CHAR_LF:
559 pos_.line_++;
560 return ScanUnicodeCharacterHelper(cpSize, util::StringView::Iterator::INVALID_CP);
561 case LEX_CHAR_LOWERCASE_B:
562 return ScanUnicodeCharacterHelper(cpSize, LEX_CHAR_BS);
563 case LEX_CHAR_LOWERCASE_T:
564 return ScanUnicodeCharacterHelper(cpSize, LEX_CHAR_TAB);
565 case LEX_CHAR_LOWERCASE_N:
566 return ScanUnicodeCharacterHelper(cpSize, LEX_CHAR_LF);
567 case LEX_CHAR_LOWERCASE_V:
568 return ScanUnicodeCharacterHelper(cpSize, LEX_CHAR_VT);
569 case LEX_CHAR_LOWERCASE_F:
570 return ScanUnicodeCharacterHelper(cpSize, LEX_CHAR_FF);
571 case LEX_CHAR_LOWERCASE_R:
572 return ScanUnicodeCharacterHelper(cpSize, LEX_CHAR_CR);
573 case LEX_CHAR_LOWERCASE_X:
574 Iterator().Forward(1);
575 return ScanHexEscape<2U>();
576 case LEX_CHAR_LOWERCASE_U:
577 return ScanUnicodeEscapeSequence();
578 case LEX_CHAR_0: {
579 Iterator().Forward(1);
580 bool isDecimal = IsDecimalDigit(Iterator().Peek());
581 Iterator().Backward(1);
582
583 if (!isDecimal) {
584 return ScanUnicodeCharacterHelper(cpSize, LEX_CHAR_NULL);
585 }
586
587 [[fallthrough]];
588 }
589 default:
590 if (IsDecimalDigit(Iterator().Peek())) {
591 LogSyntaxError("Invalid character escape sequence in strict mode");
592 cp = UNICODE_INVALID_CP;
593 }
594 break;
595 }
596
597 return ScanUnicodeCharacterHelper(cpSize, cp);
598 }
599
ScanQuestionPunctuator()600 void Lexer::ScanQuestionPunctuator()
601 {
602 GetToken().type_ = TokenType::PUNCTUATOR_QUESTION_MARK;
603
604 switch (Iterator().Peek()) {
605 case LEX_CHAR_QUESTION: {
606 GetToken().type_ = TokenType::PUNCTUATOR_NULLISH_COALESCING;
607 Iterator().Forward(1);
608
609 switch (Iterator().Peek()) {
610 case LEX_CHAR_EQUALS: {
611 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_NULLISH_EQUAL;
612 Iterator().Forward(1);
613 break;
614 }
615 default: {
616 break;
617 }
618 }
619
620 break;
621 }
622 case LEX_CHAR_DOT: {
623 Iterator().Forward(1);
624
625 if (!IsDecimalDigit(Iterator().Peek())) {
626 GetToken().type_ = TokenType::PUNCTUATOR_QUESTION_DOT;
627 return;
628 }
629
630 Iterator().Backward(1);
631 break;
632 }
633 default: {
634 break;
635 }
636 }
637 }
638
ScanLessThanPunctuator()639 void Lexer::ScanLessThanPunctuator()
640 {
641 GetToken().type_ = TokenType::PUNCTUATOR_LESS_THAN;
642
643 switch (Iterator().Peek()) {
644 case LEX_CHAR_LESS_THAN: {
645 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_SHIFT;
646 Iterator().Forward(1);
647
648 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
649 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_SHIFT_EQUAL;
650 Iterator().Forward(1);
651 }
652 break;
653 }
654 case LEX_CHAR_EQUALS: {
655 GetToken().type_ = TokenType::PUNCTUATOR_LESS_THAN_EQUAL;
656 Iterator().Forward(1);
657 break;
658 }
659 default: {
660 break;
661 }
662 }
663 }
664
ScanGreaterThanPunctuator()665 void Lexer::ScanGreaterThanPunctuator()
666 {
667 GetToken().type_ = TokenType::PUNCTUATOR_GREATER_THAN;
668
669 switch (Iterator().Peek()) {
670 case LEX_CHAR_GREATER_THAN: {
671 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_SHIFT;
672 Iterator().Forward(1);
673
674 switch (Iterator().Peek()) {
675 case LEX_CHAR_GREATER_THAN: {
676 GetToken().type_ = TokenType::PUNCTUATOR_UNSIGNED_RIGHT_SHIFT;
677 Iterator().Forward(1);
678
679 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
680 GetToken().type_ = TokenType::PUNCTUATOR_UNSIGNED_RIGHT_SHIFT_EQUAL;
681 Iterator().Forward(1);
682 }
683 break;
684 }
685 case LEX_CHAR_EQUALS: {
686 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_SHIFT_EQUAL;
687 Iterator().Forward(1);
688 break;
689 }
690 default: {
691 break;
692 }
693 }
694 break;
695 }
696 case LEX_CHAR_EQUALS: {
697 GetToken().type_ = TokenType::PUNCTUATOR_GREATER_THAN_EQUAL;
698 Iterator().Forward(1);
699 break;
700 }
701 default: {
702 break;
703 }
704 }
705 }
706
ScanEqualsPunctuator()707 void Lexer::ScanEqualsPunctuator()
708 {
709 GetToken().type_ = TokenType::PUNCTUATOR_SUBSTITUTION;
710
711 switch (Iterator().Peek()) {
712 case LEX_CHAR_EQUALS: {
713 GetToken().type_ = TokenType::PUNCTUATOR_EQUAL;
714 Iterator().Forward(1);
715
716 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
717 GetToken().type_ = TokenType::PUNCTUATOR_STRICT_EQUAL;
718 Iterator().Forward(1);
719 }
720 break;
721 }
722 case LEX_CHAR_GREATER_THAN: {
723 GetToken().type_ = TokenType::PUNCTUATOR_ARROW;
724 Iterator().Forward(1);
725 break;
726 }
727 default: {
728 break;
729 }
730 }
731 }
732
ScanExclamationPunctuator()733 void Lexer::ScanExclamationPunctuator()
734 {
735 GetToken().type_ = TokenType::PUNCTUATOR_EXCLAMATION_MARK;
736
737 switch (Iterator().Peek()) {
738 case LEX_CHAR_EQUALS: {
739 GetToken().type_ = TokenType::PUNCTUATOR_NOT_EQUAL;
740 Iterator().Forward(1);
741
742 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
743 GetToken().type_ = TokenType::PUNCTUATOR_NOT_STRICT_EQUAL;
744 Iterator().Forward(1);
745 }
746 break;
747 }
748 default: {
749 break;
750 }
751 }
752 }
753
ScanAmpersandPunctuator()754 void Lexer::ScanAmpersandPunctuator()
755 {
756 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_AND;
757
758 switch (Iterator().Peek()) {
759 case LEX_CHAR_AMPERSAND: {
760 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_AND;
761 Iterator().Forward(1);
762
763 switch (Iterator().Peek()) {
764 case LEX_CHAR_EQUALS: {
765 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_AND_EQUAL;
766 Iterator().Forward(1);
767 break;
768 }
769 default: {
770 break;
771 }
772 }
773
774 break;
775 }
776 case LEX_CHAR_EQUALS: {
777 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_AND_EQUAL;
778 Iterator().Forward(1);
779 break;
780 }
781 default: {
782 break;
783 }
784 }
785 }
786
ScanAtPunctuator()787 void Lexer::ScanAtPunctuator()
788 {
789 GetToken().type_ = TokenType::PUNCTUATOR_AT;
790
791 if (Iterator().Peek() == LEX_CHAR_AT) {
792 GetToken().type_ = TokenType::PUNCTUATOR_FORMAT;
793 Iterator().Forward(1U);
794 }
795 }
796
ScanVLinePunctuator()797 void Lexer::ScanVLinePunctuator()
798 {
799 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_OR;
800
801 switch (Iterator().Peek()) {
802 case LEX_CHAR_VLINE: {
803 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_OR;
804 Iterator().Forward(1);
805
806 switch (Iterator().Peek()) {
807 case LEX_CHAR_EQUALS: {
808 GetToken().type_ = TokenType::PUNCTUATOR_LOGICAL_OR_EQUAL;
809 Iterator().Forward(1);
810 break;
811 }
812 default: {
813 break;
814 }
815 }
816
817 break;
818 }
819 case LEX_CHAR_EQUALS: {
820 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_OR_EQUAL;
821 Iterator().Forward(1);
822 break;
823 }
824 default: {
825 break;
826 }
827 }
828 }
829
ScanCircumflexPunctuator()830 void Lexer::ScanCircumflexPunctuator()
831 {
832 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_XOR;
833
834 switch (Iterator().Peek()) {
835 case LEX_CHAR_EQUALS: {
836 GetToken().type_ = TokenType::PUNCTUATOR_BITWISE_XOR_EQUAL;
837 Iterator().Forward(1);
838 break;
839 }
840 default: {
841 break;
842 }
843 }
844 }
845
ScanPlusPunctuator()846 void Lexer::ScanPlusPunctuator()
847 {
848 GetToken().type_ = TokenType::PUNCTUATOR_PLUS;
849
850 switch (Iterator().Peek()) {
851 case LEX_CHAR_PLUS: {
852 GetToken().type_ = TokenType::PUNCTUATOR_PLUS_PLUS;
853 Iterator().Forward(1);
854 break;
855 }
856 case LEX_CHAR_EQUALS: {
857 GetToken().type_ = TokenType::PUNCTUATOR_PLUS_EQUAL;
858 Iterator().Forward(1);
859 break;
860 }
861 default: {
862 break;
863 }
864 }
865 }
866
ScanMinusPunctuator()867 void Lexer::ScanMinusPunctuator()
868 {
869 GetToken().type_ = TokenType::PUNCTUATOR_MINUS;
870
871 switch (Iterator().Peek()) {
872 case LEX_CHAR_MINUS: {
873 GetToken().type_ = TokenType::PUNCTUATOR_MINUS_MINUS;
874 Iterator().Forward(1);
875 break;
876 }
877 case LEX_CHAR_EQUALS: {
878 GetToken().type_ = TokenType::PUNCTUATOR_MINUS_EQUAL;
879 Iterator().Forward(1);
880 break;
881 }
882 default: {
883 break;
884 }
885 }
886 }
887
ScanSlashPunctuator()888 void Lexer::ScanSlashPunctuator()
889 {
890 GetToken().type_ = TokenType::PUNCTUATOR_DIVIDE;
891
892 switch (Iterator().Peek()) {
893 case LEX_CHAR_EQUALS: {
894 GetToken().type_ = TokenType::PUNCTUATOR_DIVIDE_EQUAL;
895 Iterator().Forward(1);
896 break;
897 }
898 default: {
899 break;
900 }
901 }
902 }
903
ScanDotPunctuator()904 void Lexer::ScanDotPunctuator()
905 {
906 GetToken().type_ = TokenType::PUNCTUATOR_PERIOD;
907
908 switch (Iterator().Peek()) {
909 case LEX_CHAR_0:
910 case LEX_CHAR_1:
911 case LEX_CHAR_2:
912 case LEX_CHAR_3:
913 case LEX_CHAR_4:
914 case LEX_CHAR_5:
915 case LEX_CHAR_6:
916 case LEX_CHAR_7:
917 case LEX_CHAR_8:
918 case LEX_CHAR_9: {
919 ScanNumber();
920 break;
921 }
922 case LEX_CHAR_QUESTION: {
923 GetToken().type_ = TokenType::PUNCTUATOR_PERIOD_QUESTION;
924 Iterator().Forward(1);
925 break;
926 }
927 case LEX_CHAR_DOT: {
928 Iterator().Forward(1);
929
930 if (Iterator().Peek() == LEX_CHAR_DOT) {
931 GetToken().type_ = TokenType::PUNCTUATOR_PERIOD_PERIOD_PERIOD;
932 Iterator().Forward(1);
933 break;
934 }
935
936 Iterator().Backward(1);
937 break;
938 }
939 default: {
940 break;
941 }
942 }
943 }
944
ScanAsteriskPunctuator()945 void Lexer::ScanAsteriskPunctuator()
946 {
947 GetToken().type_ = TokenType::PUNCTUATOR_MULTIPLY;
948
949 switch (Iterator().Peek()) {
950 case LEX_CHAR_ASTERISK: {
951 GetToken().type_ = TokenType::PUNCTUATOR_EXPONENTIATION;
952 Iterator().Forward(1);
953
954 if (Iterator().Peek() == LEX_CHAR_EQUALS) {
955 GetToken().type_ = TokenType::PUNCTUATOR_EXPONENTIATION_EQUAL;
956 Iterator().Forward(1);
957 }
958 break;
959 }
960 case LEX_CHAR_EQUALS: {
961 GetToken().type_ = TokenType::PUNCTUATOR_MULTIPLY_EQUAL;
962 Iterator().Forward(1);
963 break;
964 }
965 default: {
966 break;
967 }
968 }
969 }
970
ScanPercentPunctuator()971 void Lexer::ScanPercentPunctuator()
972 {
973 GetToken().type_ = TokenType::PUNCTUATOR_MOD;
974
975 switch (Iterator().Peek()) {
976 case LEX_CHAR_EQUALS: {
977 GetToken().type_ = TokenType::PUNCTUATOR_MOD_EQUAL;
978 Iterator().Forward(1);
979 break;
980 }
981 default: {
982 break;
983 }
984 }
985 }
986
IsLineTerminatorOrEos() const987 bool Lexer::IsLineTerminatorOrEos() const
988 {
989 switch (Iterator().PeekCp()) {
990 case util::StringView::Iterator::INVALID_CP:
991 case LEX_CHAR_LF:
992 case LEX_CHAR_CR:
993 case LEX_CHAR_LS:
994 case LEX_CHAR_PS: {
995 return true;
996 }
997 default: {
998 break;
999 }
1000 }
1001
1002 return false;
1003 }
1004
ScanRegExpPattern()1005 bool Lexer::ScanRegExpPattern()
1006 {
1007 bool isCharClass = false;
1008 size_t cpSize {};
1009
1010 while (true) {
1011 switch (Iterator().PeekCp(&cpSize)) {
1012 case util::StringView::Iterator::INVALID_CP:
1013 case LEX_CHAR_LF:
1014 case LEX_CHAR_CR:
1015 case LEX_CHAR_LS:
1016 case LEX_CHAR_PS: {
1017 LogSyntaxError("Unterminated RegExp");
1018 return false;
1019 }
1020 case LEX_CHAR_SLASH: {
1021 if (!isCharClass) {
1022 return true;
1023 }
1024
1025 break;
1026 }
1027 case LEX_CHAR_LEFT_SQUARE: {
1028 isCharClass = true;
1029 break;
1030 }
1031 case LEX_CHAR_RIGHT_SQUARE: {
1032 isCharClass = false;
1033 break;
1034 }
1035 case LEX_CHAR_BACKSLASH: {
1036 Iterator().Forward(1);
1037
1038 if (IsLineTerminatorOrEos()) {
1039 continue;
1040 }
1041
1042 break;
1043 }
1044 default: {
1045 break;
1046 }
1047 }
1048
1049 Iterator().Forward(cpSize);
1050 }
1051
1052 return true;
1053 }
1054
ScanRegExpFlags()1055 RegExpFlags Lexer::ScanRegExpFlags()
1056 {
1057 RegExpFlags resultFlags = RegExpFlags::EMPTY;
1058
1059 while (true) {
1060 size_t cpSize {};
1061 auto cp = Iterator().PeekCp(&cpSize);
1062 if (!KeywordsUtil::IsIdentifierPart(cp)) {
1063 break;
1064 }
1065
1066 Iterator().Forward(cpSize);
1067
1068 RegExpFlags flag = RegExpFlags::EMPTY;
1069
1070 switch (cp) {
1071 case LEX_CHAR_LOWERCASE_G: {
1072 flag = RegExpFlags::GLOBAL;
1073 break;
1074 }
1075 case LEX_CHAR_LOWERCASE_I: {
1076 flag = RegExpFlags::IGNORE_CASE;
1077 break;
1078 }
1079 case LEX_CHAR_LOWERCASE_M: {
1080 flag = RegExpFlags::MULTILINE;
1081 break;
1082 }
1083 case LEX_CHAR_LOWERCASE_S: {
1084 flag = RegExpFlags::DOTALL;
1085 break;
1086 }
1087 case LEX_CHAR_LOWERCASE_U: {
1088 flag = RegExpFlags::UNICODE;
1089 break;
1090 }
1091 case LEX_CHAR_LOWERCASE_Y: {
1092 flag = RegExpFlags::STICKY;
1093 break;
1094 }
1095 case LEX_CHAR_SP: {
1096 return resultFlags;
1097 }
1098 default: {
1099 LogSyntaxError("Invalid RegExp flag");
1100 return resultFlags;
1101 }
1102 }
1103
1104 if (flag == RegExpFlags::EMPTY || (resultFlags & flag) != 0) {
1105 LogSyntaxError("Invalid RegExp flag");
1106 }
1107
1108 resultFlags = resultFlags | flag;
1109 }
1110
1111 return resultFlags;
1112 }
1113
CheckOctal()1114 void Lexer::CheckOctal()
1115 {
1116 switch (Iterator().Peek()) {
1117 case LEX_CHAR_8:
1118 case LEX_CHAR_9: {
1119 LogSyntaxError("Invalid octal digit");
1120 break;
1121 }
1122 default: {
1123 break;
1124 }
1125 }
1126 }
1127
ScanRegExp()1128 RegExp Lexer::ScanRegExp()
1129 {
1130 // for proper handling such regexps as /=/
1131 if (GetToken().Type() == lexer::TokenType::PUNCTUATOR_DIVIDE_EQUAL) {
1132 Iterator().Backward(1);
1133 }
1134
1135 GetToken().type_ = TokenType::LITERAL_REGEXP;
1136 GetToken().keywordType_ = TokenType::LITERAL_REGEXP;
1137
1138 const auto patternStart = Iterator().Index();
1139 if (!ScanRegExpPattern()) {
1140 return {nullptr, nullptr, RegExpFlags::EMPTY};
1141 }
1142
1143 const auto pattern = SourceView(patternStart, Iterator().Index());
1144
1145 ASSERT(Iterator().Peek() == LEX_CHAR_SLASH);
1146 Iterator().Forward(1);
1147
1148 const auto flagsStart = Iterator().Index();
1149 RegExpFlags resultFlags = ScanRegExpFlags();
1150 const auto flags = SourceView(flagsStart, Iterator().Index());
1151
1152 SkipWhiteSpaces();
1153 SetTokenEnd();
1154
1155 return {pattern, flags, resultFlags};
1156 }
1157
CheckArrow()1158 bool Lexer::CheckArrow()
1159 {
1160 if (Iterator().Peek() != LEX_CHAR_EQUALS) {
1161 return false;
1162 }
1163 Iterator().Forward(1);
1164
1165 bool res = Iterator().Peek() == LEX_CHAR_GREATER_THAN;
1166 Iterator().Backward(1);
1167
1168 return res;
1169 }
1170
SetTokenStart()1171 void Lexer::SetTokenStart()
1172 {
1173 if (pos_.nextTokenLine_ != 0) {
1174 pos_.line_ += pos_.nextTokenLine_;
1175 pos_.nextTokenLine_ = 0;
1176 GetToken().flags_ = TokenFlags::NEW_LINE;
1177 } else {
1178 GetToken().flags_ = TokenFlags::NONE;
1179 }
1180
1181 pos_.token_.loc_.start = SourcePosition {Iterator().Index(), pos_.line_};
1182 GetToken().keywordType_ = TokenType::EOS;
1183 }
1184
SetTokenEnd()1185 void Lexer::SetTokenEnd()
1186 {
1187 pos_.token_.loc_.end = SourcePosition {Iterator().Index(), pos_.line_};
1188 }
1189
SkipWhiteSpacesHelperSlash(char32_t * cp)1190 bool Lexer::SkipWhiteSpacesHelperSlash(char32_t *cp)
1191 {
1192 Iterator().Forward(1);
1193 *cp = Iterator().Peek();
1194 if (*cp == LEX_CHAR_SLASH || *cp == LEX_CHAR_ASTERISK) {
1195 Iterator().Forward(1);
1196 *cp == LEX_CHAR_SLASH ? SkipSingleLineComment() : SkipMultiLineComment();
1197 return true;
1198 }
1199
1200 Iterator().Backward(1);
1201 return false;
1202 }
1203
SkipWhiteSpacesHelperDefault(const char32_t & cp)1204 bool Lexer::SkipWhiteSpacesHelperDefault(const char32_t &cp)
1205 {
1206 if (cp < LEX_ASCII_MAX_BITS) {
1207 return false;
1208 }
1209
1210 size_t cpSize {};
1211
1212 switch (Iterator().PeekCp(&cpSize)) {
1213 case LEX_CHAR_LS:
1214 case LEX_CHAR_PS:
1215 pos_.nextTokenLine_++;
1216 [[fallthrough]];
1217 case LEX_CHAR_NBSP:
1218 case LEX_CHAR_ZWNBSP:
1219 Iterator().Forward(cpSize);
1220 return true;
1221 default:
1222 return false;
1223 }
1224 }
1225
SkipWhiteSpaces()1226 void Lexer::SkipWhiteSpaces()
1227 {
1228 while (true) {
1229 auto cp = Iterator().Peek();
1230
1231 switch (cp) {
1232 case LEX_CHAR_CR:
1233 Iterator().Forward(1);
1234
1235 if (Iterator().Peek() != LEX_CHAR_LF) {
1236 Iterator().Backward(1);
1237 }
1238
1239 [[fallthrough]];
1240 case LEX_CHAR_LF:
1241 Iterator().Forward(1);
1242 pos_.nextTokenLine_++;
1243 continue;
1244 case LEX_CHAR_VT:
1245 case LEX_CHAR_FF:
1246 case LEX_CHAR_SP:
1247 case LEX_CHAR_TAB:
1248 Iterator().Forward(1);
1249 continue;
1250 case LEX_CHAR_SLASH:
1251 if (!SkipWhiteSpacesHelperSlash(&cp)) {
1252 return;
1253 }
1254 continue;
1255 default:
1256 if (!SkipWhiteSpacesHelperDefault(cp)) {
1257 return;
1258 }
1259 continue;
1260 }
1261 }
1262 }
1263
ScanHashMark()1264 void Lexer::ScanHashMark()
1265 {
1266 GetToken().type_ = TokenType::PUNCTUATOR_HASH_MARK;
1267 }
1268
ScanBackTick()1269 void Lexer::ScanBackTick()
1270 {
1271 GetToken().type_ = TokenType::PUNCTUATOR_BACK_TICK;
1272 SetTokenEnd();
1273 }
1274
1275 // NOLINTNEXTLINE(google-default-arguments)
NextToken(NextTokenFlags flags)1276 void Lexer::NextToken(NextTokenFlags flags)
1277 {
1278 JSKeywords kws(this, flags);
1279 NextToken(&kws);
1280 }
1281
ScanColonPunctuator()1282 void Lexer::ScanColonPunctuator()
1283 {
1284 GetToken().type_ = TokenType::PUNCTUATOR_COLON;
1285 }
1286
ScanDollarPunctuator()1287 bool Lexer::ScanDollarPunctuator()
1288 {
1289 return false;
1290 }
1291
1292 // CC-OFFNXT(huge_method,huge_cyclomatic_complexity,G.FUN.01-CPP) big switch-case, solid logic
1293 // NOLINTNEXTLINE(readability-function-size)
NextToken(Keywords * kws)1294 void Lexer::NextToken(Keywords *kws)
1295 {
1296 KeywordsUtil &kwu = kws->Util();
1297
1298 SetTokenStart();
1299
1300 auto cp = Iterator().Peek();
1301 Iterator().Forward(1);
1302
1303 switch (cp) {
1304 case LEX_CHAR_EXCLAMATION: {
1305 ScanExclamationPunctuator();
1306 break;
1307 }
1308 case LEX_CHAR_SINGLE_QUOTE: {
1309 ScanString<LEX_CHAR_SINGLE_QUOTE>();
1310 break;
1311 }
1312 case LEX_CHAR_DOUBLE_QUOTE: {
1313 ScanString<LEX_CHAR_DOUBLE_QUOTE>();
1314 break;
1315 }
1316 case LEX_CHAR_HASH_MARK: {
1317 ScanHashMark();
1318 break;
1319 }
1320 case LEX_CHAR_PERCENT: {
1321 ScanPercentPunctuator();
1322 break;
1323 }
1324 case LEX_CHAR_AMPERSAND: {
1325 ScanAmpersandPunctuator();
1326 break;
1327 }
1328 case LEX_CHAR_LEFT_PAREN: {
1329 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_PARENTHESIS;
1330 break;
1331 }
1332 case LEX_CHAR_RIGHT_PAREN: {
1333 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_PARENTHESIS;
1334 break;
1335 }
1336 case LEX_CHAR_ASTERISK: {
1337 ScanAsteriskPunctuator();
1338 break;
1339 }
1340 case LEX_CHAR_PLUS: {
1341 ScanPlusPunctuator();
1342 break;
1343 }
1344 case LEX_CHAR_COMMA: {
1345 GetToken().type_ = TokenType::PUNCTUATOR_COMMA;
1346 break;
1347 }
1348 case LEX_CHAR_MINUS: {
1349 ScanMinusPunctuator();
1350 break;
1351 }
1352 case LEX_CHAR_DOT: {
1353 ScanDotPunctuator();
1354 break;
1355 }
1356 case LEX_CHAR_SLASH: {
1357 ScanSlashPunctuator();
1358 break;
1359 }
1360 case LEX_CHAR_0: {
1361 if (Iterator().Peek() != LEX_CHAR_DOT) {
1362 ScanNumberLeadingZero((kwu.Flags() & NextTokenFlags::UNARY_MINUS) !=
1363 std::underlying_type_t<NextTokenFlags>(0U));
1364 break;
1365 }
1366 [[fallthrough]];
1367 }
1368 case LEX_CHAR_1:
1369 case LEX_CHAR_2:
1370 case LEX_CHAR_3:
1371 case LEX_CHAR_4:
1372 case LEX_CHAR_5:
1373 case LEX_CHAR_6:
1374 case LEX_CHAR_7:
1375 case LEX_CHAR_8:
1376 case LEX_CHAR_9: {
1377 ScanNumber((kwu.Flags() & NextTokenFlags::UNARY_MINUS) != std::underlying_type_t<NextTokenFlags>(0U));
1378 break;
1379 }
1380 case LEX_CHAR_COLON: {
1381 ScanColonPunctuator();
1382 break;
1383 }
1384 case LEX_CHAR_SEMICOLON: {
1385 GetToken().type_ = TokenType::PUNCTUATOR_SEMI_COLON;
1386 break;
1387 }
1388 case LEX_CHAR_LESS_THAN: {
1389 ScanLessThanPunctuator();
1390 break;
1391 }
1392 case LEX_CHAR_EQUALS: {
1393 ScanEqualsPunctuator();
1394 break;
1395 }
1396 case LEX_CHAR_GREATER_THAN: {
1397 ScanGreaterThanPunctuator();
1398 break;
1399 }
1400 case LEX_CHAR_QUESTION: {
1401 ScanQuestionPunctuator();
1402 break;
1403 }
1404 case LEX_CHAR_AT: {
1405 ScanAtPunctuator();
1406 break;
1407 }
1408 case LEX_CHAR_DOLLAR_SIGN: {
1409 if (ScanDollarPunctuator()) {
1410 break;
1411 }
1412
1413 [[fallthrough]];
1414 }
1415 case LEX_CHAR_UPPERCASE_A:
1416 case LEX_CHAR_UPPERCASE_B:
1417 case LEX_CHAR_UPPERCASE_C:
1418 case LEX_CHAR_UPPERCASE_D:
1419 case LEX_CHAR_UPPERCASE_E:
1420 case LEX_CHAR_UPPERCASE_F:
1421 case LEX_CHAR_UPPERCASE_G:
1422 case LEX_CHAR_UPPERCASE_H:
1423 case LEX_CHAR_UPPERCASE_I:
1424 case LEX_CHAR_UPPERCASE_J:
1425 case LEX_CHAR_UPPERCASE_K:
1426 case LEX_CHAR_UPPERCASE_L:
1427 case LEX_CHAR_UPPERCASE_M:
1428 case LEX_CHAR_UPPERCASE_N:
1429 case LEX_CHAR_UPPERCASE_O:
1430 case LEX_CHAR_UPPERCASE_P:
1431 case LEX_CHAR_UPPERCASE_Q:
1432 case LEX_CHAR_UPPERCASE_R:
1433 case LEX_CHAR_UPPERCASE_S:
1434 case LEX_CHAR_UPPERCASE_T:
1435 case LEX_CHAR_UPPERCASE_U:
1436 case LEX_CHAR_UPPERCASE_V:
1437 case LEX_CHAR_UPPERCASE_W:
1438 case LEX_CHAR_UPPERCASE_X:
1439 case LEX_CHAR_UPPERCASE_Y:
1440 case LEX_CHAR_UPPERCASE_Z:
1441 case LEX_CHAR_UNDERSCORE: {
1442 kwu.ScanIdContinue();
1443 break;
1444 }
1445 case LEX_CHAR_LEFT_SQUARE: {
1446 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_SQUARE_BRACKET;
1447 break;
1448 }
1449 case LEX_CHAR_BACKSLASH: {
1450 GetToken().flags_ |= TokenFlags::HAS_ESCAPE;
1451
1452 if (Iterator().Peek() != LEX_CHAR_LOWERCASE_U) {
1453 LogSyntaxError("Invalid character");
1454 break;
1455 }
1456
1457 cp = ScanUnicodeEscapeSequence();
1458 kwu.ScanIdentifierStart(kws, cp);
1459 break;
1460 }
1461 case LEX_CHAR_RIGHT_SQUARE: {
1462 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_SQUARE_BRACKET;
1463 break;
1464 }
1465 case LEX_CHAR_CIRCUMFLEX: {
1466 ScanCircumflexPunctuator();
1467 break;
1468 }
1469 case LEX_CHAR_BACK_TICK: {
1470 ScanBackTick();
1471 return;
1472 }
1473 case LEX_CHAR_LOWERCASE_C: {
1474 if (ScanCharLiteral()) {
1475 break;
1476 }
1477 }
1478 [[fallthrough]];
1479 case LEX_CHAR_LOWERCASE_A:
1480 case LEX_CHAR_LOWERCASE_B:
1481 case LEX_CHAR_LOWERCASE_D:
1482 case LEX_CHAR_LOWERCASE_E:
1483 case LEX_CHAR_LOWERCASE_F:
1484 case LEX_CHAR_LOWERCASE_G:
1485 case LEX_CHAR_LOWERCASE_H:
1486 case LEX_CHAR_LOWERCASE_I:
1487 case LEX_CHAR_LOWERCASE_J:
1488 case LEX_CHAR_LOWERCASE_K:
1489 case LEX_CHAR_LOWERCASE_L:
1490 case LEX_CHAR_LOWERCASE_M:
1491 case LEX_CHAR_LOWERCASE_N:
1492 case LEX_CHAR_LOWERCASE_O:
1493 case LEX_CHAR_LOWERCASE_P:
1494 case LEX_CHAR_LOWERCASE_Q:
1495 case LEX_CHAR_LOWERCASE_R:
1496 case LEX_CHAR_LOWERCASE_S:
1497 case LEX_CHAR_LOWERCASE_T:
1498 case LEX_CHAR_LOWERCASE_U:
1499 case LEX_CHAR_LOWERCASE_V:
1500 case LEX_CHAR_LOWERCASE_W:
1501 case LEX_CHAR_LOWERCASE_X:
1502 case LEX_CHAR_LOWERCASE_Y:
1503 case LEX_CHAR_LOWERCASE_Z: {
1504 kws->ScanKeyword(cp);
1505 break;
1506 }
1507 case LEX_CHAR_LEFT_BRACE: {
1508 GetToken().type_ = TokenType::PUNCTUATOR_LEFT_BRACE;
1509
1510 if (tlCtx_ != nullptr) {
1511 tlCtx_->ConsumeLeftBrace();
1512 }
1513
1514 break;
1515 }
1516 case LEX_CHAR_VLINE: {
1517 ScanVLinePunctuator();
1518 break;
1519 }
1520 case LEX_CHAR_RIGHT_BRACE: {
1521 GetToken().type_ = TokenType::PUNCTUATOR_RIGHT_BRACE;
1522
1523 if (tlCtx_ != nullptr && tlCtx_->ConsumeRightBrace()) {
1524 SetTokenEnd();
1525 return;
1526 }
1527
1528 break;
1529 }
1530 case LEX_CHAR_TILDE: {
1531 GetToken().type_ = TokenType::PUNCTUATOR_TILDE;
1532 break;
1533 }
1534 default: {
1535 Iterator().Backward(1);
1536 if (cp == util::StringView::Iterator::INVALID_CP) {
1537 GetToken().type_ = TokenType::EOS;
1538 break;
1539 }
1540
1541 cp = Iterator().Next();
1542 kwu.ScanIdentifierStart(kws, cp);
1543 break;
1544 }
1545 }
1546
1547 SetTokenEnd();
1548 SkipWhiteSpaces();
1549 }
1550
ScanNumberLeadingZeroImplNonAllowedCases()1551 void Lexer::ScanNumberLeadingZeroImplNonAllowedCases()
1552 {
1553 switch (Iterator().Peek()) {
1554 case LEX_CHAR_0:
1555 case LEX_CHAR_1:
1556 case LEX_CHAR_2:
1557 case LEX_CHAR_3:
1558 case LEX_CHAR_4:
1559 case LEX_CHAR_5:
1560 case LEX_CHAR_6:
1561 case LEX_CHAR_7: {
1562 LogSyntaxError("Implicit octal literal not allowed");
1563 break;
1564 }
1565 case LEX_CHAR_8:
1566 case LEX_CHAR_9: {
1567 LogSyntaxError("NonOctalDecimalIntegerLiteral is not enabled in strict mode code");
1568 break;
1569 }
1570 case LEX_CHAR_UNDERSCORE: {
1571 LogSyntaxError("Numeric separator '_' is not allowed in numbers that start with '0'.");
1572 break;
1573 }
1574 default: {
1575 break;
1576 }
1577 }
1578 }
1579
HandleNewlineHelper(util::UString * str,size_t * escapeEnd)1580 void Lexer::HandleNewlineHelper(util::UString *str, size_t *escapeEnd)
1581 {
1582 GetToken().flags_ |= TokenFlags::HAS_ESCAPE;
1583 str->Append(SourceView(*escapeEnd, Iterator().Index()));
1584
1585 if (Iterator().Peek() == LEX_CHAR_CR) {
1586 Iterator().Forward(1);
1587 if (Iterator().Peek() != LEX_CHAR_LF) {
1588 Iterator().Backward(1);
1589 }
1590 }
1591
1592 pos_.line_++;
1593 str->Append(LEX_CHAR_LF);
1594 Iterator().Forward(1);
1595 *escapeEnd = Iterator().Index();
1596 }
1597
HandleBackslashHelper(util::UString * str,size_t * escapeEnd)1598 bool Lexer::HandleBackslashHelper(util::UString *str, size_t *escapeEnd)
1599 {
1600 GetToken().flags_ |= TokenFlags::HAS_ESCAPE;
1601 str->Append(SourceView(*escapeEnd, Iterator().Index()));
1602 Iterator().Forward(1);
1603 bool scanned = ScanStringUnicodePart(str);
1604 *escapeEnd = Iterator().Index();
1605 return scanned;
1606 }
1607
HandleDollarSignHelper(const char32_t & end)1608 bool Lexer::HandleDollarSignHelper(const char32_t &end)
1609 {
1610 Iterator().Forward(1);
1611 if (end == LEX_CHAR_BACK_TICK) {
1612 if (Iterator().Peek() == LEX_CHAR_LEFT_BRACE) {
1613 Iterator().Backward(1);
1614 return true;
1615 }
1616 }
1617 return false;
1618 }
1619
HandleDoubleQuoteHelper(const char32_t & end,const char32_t & cp)1620 bool Lexer::HandleDoubleQuoteHelper(const char32_t &end, const char32_t &cp)
1621 {
1622 if (end == cp) {
1623 return false;
1624 }
1625 Iterator().Forward(1);
1626 return true;
1627 }
1628
PrepareStringTokenHelper()1629 void Lexer::PrepareStringTokenHelper()
1630 {
1631 GetToken().type_ = TokenType::LITERAL_STRING;
1632 GetToken().keywordType_ = TokenType::LITERAL_STRING;
1633 }
1634
FinalizeTokenHelper(util::UString * str,const size_t & startPos,size_t escapeEnd,bool finalize)1635 void Lexer::FinalizeTokenHelper(util::UString *str, const size_t &startPos, size_t escapeEnd, bool finalize)
1636 {
1637 if (!finalize) {
1638 return;
1639 }
1640
1641 if ((GetToken().flags_ & TokenFlags::HAS_ESCAPE) != 0U) {
1642 str->Append(SourceView(escapeEnd, Iterator().Index()));
1643 GetToken().src_ = str->View();
1644 } else {
1645 GetToken().src_ = SourceView(startPos, Iterator().Index());
1646 }
1647 }
1648
Pos()1649 LexerPosition &Lexer::Pos()
1650 {
1651 return pos_;
1652 }
1653
Pos() const1654 const LexerPosition &Lexer::Pos() const
1655 {
1656 return pos_;
1657 }
1658 } // namespace ark::es2panda::lexer
1659