1 /**
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "regexp.h"
17
18 #include <lexer/token/letters.h>
19 #include <unicode/uchar.h>
20
21 namespace panda::es2panda::lexer {
22
RegExpError(const std::string_view & m)23 RegExpError::RegExpError(const std::string_view &m) : message(m) {}
24
RegExp(util::StringView p,util::StringView f,RegExpFlags reFlags)25 RegExp::RegExp(util::StringView p, util::StringView f, RegExpFlags reFlags) : patternStr(p), flagsStr(f), flags(reFlags)
26 {
27 }
28
RegExpParser(const RegExp & re,ArenaAllocator * allocator)29 RegExpParser::RegExpParser(const RegExp &re, ArenaAllocator *allocator)
30 : re_(re), allocator_ {allocator}, iter_(re_.patternStr), capturingGroupCount_(0)
31 {
32 }
33
Unicode() const34 bool RegExpParser::Unicode() const
35 {
36 return (re_.flags & RegExpFlags::UNICODE) != 0;
37 }
38
Peek() const39 char32_t RegExpParser::Peek() const
40 {
41 return iter_.Peek();
42 }
43
Next()44 char32_t RegExpParser::Next()
45 {
46 return iter_.Next();
47 }
48
IsDecimalDigit(char32_t cp)49 static bool IsDecimalDigit(char32_t cp)
50 {
51 return (cp >= LEX_CHAR_0 && cp <= LEX_CHAR_9);
52 }
53
IsOctalDigit(char32_t cp)54 static bool IsOctalDigit(char32_t cp)
55 {
56 return (cp >= LEX_CHAR_0 && cp <= LEX_CHAR_7);
57 }
58
IsHexDigit(char32_t cp)59 static bool IsHexDigit(char32_t cp)
60 {
61 return IsDecimalDigit(cp) || (cp >= LEX_CHAR_LOWERCASE_A && cp <= LEX_CHAR_LOWERCASE_F) ||
62 (cp >= LEX_CHAR_UPPERCASE_A && cp <= LEX_CHAR_UPPERCASE_F);
63 }
64
DigitValue(char32_t cp)65 static uint32_t DigitValue(char32_t cp)
66 {
67 return (cp - LEX_CHAR_0);
68 }
69
ThrowError(const std::string_view & message)70 static void ThrowError(const std::string_view &message)
71 {
72 throw RegExpError(message);
73 }
74
HexValue(char32_t cp)75 static uint32_t HexValue(char32_t cp)
76 {
77 if (IsDecimalDigit(cp)) {
78 return DigitValue(cp);
79 }
80
81 constexpr auto OFFSET = 10;
82
83 if (cp < LEX_CHAR_LOWERCASE_A) {
84 ASSERT(cp >= LEX_CHAR_UPPERCASE_A);
85 return cp - LEX_CHAR_UPPERCASE_A + OFFSET;
86 }
87
88 return (cp - LEX_CHAR_LOWERCASE_A + OFFSET);
89 }
90
ParsePattern()91 void RegExpParser::ParsePattern()
92 {
93 ParseDisjunction();
94
95 if (iter_.HasNext()) {
96 ThrowError("Invalid closing parenthesis");
97 }
98 ValidateNamedGroupReferences();
99 }
100
ParseDisjunction()101 void RegExpParser::ParseDisjunction()
102 {
103 while (true) {
104 ParseAlternatives();
105
106 if (Peek() != LEX_CHAR_VLINE) {
107 break;
108 }
109
110 Next();
111 };
112 }
113
ParseAlternative()114 void RegExpParser::ParseAlternative()
115 {
116 switch (Peek()) {
117 case LEX_CHAR_BACKSLASH: {
118 Next();
119 char32_t cp = Peek();
120 if (cp == LEX_CHAR_LOWERCASE_B || cp == LEX_CHAR_UPPERCASE_B) {
121 /* assertion */
122 Next();
123 return;
124 }
125
126 ParseAtomEscape();
127 break;
128 }
129 case LEX_CHAR_CIRCUMFLEX:
130 case LEX_CHAR_DOLLAR_SIGN: {
131 /* assertion */
132 Next();
133 return;
134 }
135 case LEX_CHAR_LEFT_PAREN: {
136 Next();
137
138 if (Peek() != LEX_CHAR_QUESTION) {
139 ParseCapturingGroup();
140 break;
141 }
142
143 Next(); // eat '?'
144
145 char32_t cp = Next();
146 if (cp == LEX_CHAR_COLON) {
147 ParseNonCapturingGroup();
148 break;
149 }
150
151 if (cp == LEX_CHAR_EQUALS || cp == LEX_CHAR_EXCLAMATION) {
152 ParseAssertion();
153
154 if (Unicode()) {
155 return;
156 }
157
158 break;
159 }
160
161 if (cp != LEX_CHAR_LESS_THAN) {
162 ThrowError("Invalid group");
163 }
164
165 cp = Peek();
166 if (cp == LEX_CHAR_EQUALS || cp == LEX_CHAR_EXCLAMATION) {
167 Next();
168 ParseAssertion();
169 return;
170 }
171
172 ParseNamedCapturingGroup();
173 break;
174 }
175 case LEX_CHAR_LEFT_SQUARE: {
176 Next();
177 ParseCharacterClass();
178 break;
179 }
180 case LEX_CHAR_DOT: {
181 Next();
182 break;
183 }
184 default: {
185 if (ParseBracedQuantifier()) {
186 ThrowError("Invalid quantifier, nothing to repeat");
187 }
188
189 if (!ParsePatternCharacter()) {
190 ThrowError("Invalid character");
191 }
192
193 break;
194 }
195 }
196
197 ParseQuantifier();
198 }
199
ParseAlternatives()200 void RegExpParser::ParseAlternatives()
201 {
202 while (true) {
203 switch (Peek()) {
204 case util::StringView::Iterator::INVALID_CP:
205 case LEX_CHAR_RIGHT_PAREN:
206 case LEX_CHAR_VLINE: {
207 return;
208 }
209 default: {
210 ParseAlternative();
211 }
212 }
213 }
214 }
215
ParseNonCapturingGroup()216 void RegExpParser::ParseNonCapturingGroup()
217 {
218 ParseDisjunction();
219
220 if (Peek() != LEX_CHAR_RIGHT_PAREN) {
221 ThrowError("Invalid non-capturing group");
222 }
223
224 Next();
225 }
226
ParseNamedCapturingGroup()227 void RegExpParser::ParseNamedCapturingGroup()
228 {
229 util::StringView name = ParseIdent();
230
231 auto result = groupNames_.insert(name);
232 if (!result.second) {
233 ThrowError("Duplicate group name");
234 }
235
236 ParseCapturingGroup();
237 }
238
ParseCapturingGroup()239 void RegExpParser::ParseCapturingGroup()
240 {
241 capturingGroupCount_++;
242
243 ParseDisjunction();
244
245 if (Peek() != LEX_CHAR_RIGHT_PAREN) {
246 ThrowError("Invalid capturing group");
247 }
248
249 Next();
250 }
251
ParseAssertion()252 void RegExpParser::ParseAssertion()
253 {
254 ParseDisjunction();
255
256 if (Peek() != LEX_CHAR_RIGHT_PAREN) {
257 ThrowError("Invalid assertion");
258 }
259
260 Next();
261 }
262
ParseControlEscape()263 uint32_t RegExpParser::ParseControlEscape()
264 {
265 char32_t cp = Peek();
266 if ((cp < LEX_CHAR_LOWERCASE_A || cp > LEX_CHAR_LOWERCASE_Z) &&
267 (cp < LEX_CHAR_UPPERCASE_A || cp > LEX_CHAR_UPPERCASE_Z)) {
268 if (Unicode()) {
269 ThrowError("Invalid control escape");
270 }
271
272 if (cp < LEX_CHAR_0 || cp > LEX_CHAR_9) {
273 return LEX_CHAR_LOWERCASE_C;
274 }
275 }
276
277 Next();
278 constexpr auto MODULO = 32;
279 return cp % MODULO;
280 }
281
ParseClassAtom()282 char32_t RegExpParser::ParseClassAtom()
283 {
284 char32_t cp = Next();
285 if (cp != LEX_CHAR_BACKSLASH) {
286 return cp;
287 }
288
289 cp = Peek();
290 if (cp == LEX_CHAR_0) {
291 if (!Unicode()) {
292 return ParseDecimalEscape();
293 }
294
295 Next();
296
297 if (IsDecimalDigit(Peek())) {
298 ThrowError("Invalid class escape");
299 }
300
301 return LEX_CHAR_NULL;
302 }
303
304 Next();
305
306 switch (cp) {
307 case LEX_CHAR_LOWERCASE_C: {
308 return ParseControlEscape();
309 }
310 case LEX_CHAR_LOWERCASE_X: {
311 return ParseHexEscape();
312 }
313 case LEX_CHAR_LOWERCASE_U: {
314 if (!Unicode() && Peek() == LEX_CHAR_LEFT_BRACE) {
315 return cp;
316 }
317
318 return ParseUnicodeEscape();
319 }
320 case LEX_CHAR_LOWERCASE_P:
321 case LEX_CHAR_UPPERCASE_P: {
322 if (!Unicode()) {
323 return cp;
324 }
325
326 ParseUnicodePropertyEscape();
327 [[fallthrough]];
328 }
329 case LEX_CHAR_LOWERCASE_D:
330 case LEX_CHAR_UPPERCASE_D:
331 case LEX_CHAR_LOWERCASE_S:
332 case LEX_CHAR_UPPERCASE_S:
333 case LEX_CHAR_LOWERCASE_W:
334 case LEX_CHAR_UPPERCASE_W: {
335 return std::numeric_limits<uint32_t>::max();
336 }
337 case LEX_CHAR_LOWERCASE_B: {
338 return LEX_CHAR_BS;
339 }
340 case LEX_CHAR_LOWERCASE_F: {
341 return LEX_CHAR_FF;
342 }
343 case LEX_CHAR_LOWERCASE_N: {
344 return LEX_CHAR_LF;
345 }
346 case LEX_CHAR_LOWERCASE_R: {
347 return LEX_CHAR_CR;
348 }
349 case LEX_CHAR_LOWERCASE_T: {
350 return LEX_CHAR_TAB;
351 }
352 case LEX_CHAR_LOWERCASE_V: {
353 return LEX_CHAR_VT;
354 }
355 case LEX_CHAR_MINUS: {
356 return cp;
357 }
358 default: {
359 if (Unicode() && !IsSyntaxCharacter(cp) && cp != LEX_CHAR_SLASH) {
360 ThrowError("Invalid escape");
361 }
362
363 return cp;
364 }
365 }
366
367 return cp;
368 }
369
IsClassEscape(uint32_t cp)370 static bool IsClassEscape(uint32_t cp)
371 {
372 return cp == std::numeric_limits<uint32_t>::max();
373 }
374
ParseCharacterClass()375 void RegExpParser::ParseCharacterClass()
376 {
377 if (Peek() == LEX_CHAR_CIRCUMFLEX) {
378 Next();
379 }
380
381 while (true) {
382 if (Peek() == LEX_CHAR_RIGHT_SQUARE) {
383 Next();
384 break;
385 }
386
387 uint32_t left = ParseClassAtom();
388
389 if (Peek() != LEX_CHAR_MINUS) {
390 continue;
391 }
392
393 Next();
394
395 if (Peek() == LEX_CHAR_RIGHT_SQUARE) {
396 Next();
397 break;
398 }
399
400 uint32_t right = ParseClassAtom();
401 if ((IsClassEscape(left) || IsClassEscape(right))) {
402 if (Unicode()) {
403 ThrowError("Invalid character class");
404 }
405
406 continue;
407 }
408
409 if (left > right) {
410 ThrowError("Class range out of order");
411 }
412 }
413 }
414
IsSyntaxCharacter(char32_t cp) const415 bool RegExpParser::IsSyntaxCharacter(char32_t cp) const
416 {
417 switch (cp) {
418 case LEX_CHAR_RIGHT_SQUARE:
419 case LEX_CHAR_LEFT_BRACE:
420 case LEX_CHAR_RIGHT_BRACE: {
421 if (!Unicode()) {
422 return false;
423 }
424
425 [[fallthrough]];
426 }
427 case LEX_CHAR_CIRCUMFLEX:
428 case LEX_CHAR_DOLLAR_SIGN:
429 case LEX_CHAR_BACKSLASH:
430 case LEX_CHAR_DOT:
431 case LEX_CHAR_ASTERISK:
432 case LEX_CHAR_PLUS:
433 case LEX_CHAR_QUESTION:
434 case LEX_CHAR_LEFT_PAREN:
435 case LEX_CHAR_RIGHT_PAREN:
436 case LEX_CHAR_LEFT_SQUARE:
437 case LEX_CHAR_VLINE: {
438 return true;
439 }
440 default: {
441 return false;
442 }
443 }
444 }
445
ParseAtomEscape()446 void RegExpParser::ParseAtomEscape()
447 {
448 char32_t cp = Peek();
449 if (IsDecimalDigit(cp)) {
450 ParseDecimalEscape();
451 return;
452 }
453
454 Next();
455
456 switch (cp) {
457 case LEX_CHAR_LOWERCASE_X: {
458 if (Unicode()) {
459 ParseHexEscape();
460 }
461 break;
462 }
463 case LEX_CHAR_LOWERCASE_U: {
464 if (Unicode()) {
465 ParseUnicodeEscape();
466 }
467 break;
468 }
469 case LEX_CHAR_LOWERCASE_K: {
470 ParseNamedBackreference();
471 break;
472 }
473 /* ControlEscape */
474 case LEX_CHAR_LOWERCASE_F:
475 case LEX_CHAR_LOWERCASE_N:
476 case LEX_CHAR_LOWERCASE_R:
477 case LEX_CHAR_LOWERCASE_T:
478 case LEX_CHAR_LOWERCASE_V:
479 /* CharacterClassEscape */
480 case LEX_CHAR_LOWERCASE_D:
481 case LEX_CHAR_UPPERCASE_D:
482 case LEX_CHAR_LOWERCASE_S:
483 case LEX_CHAR_UPPERCASE_S:
484 case LEX_CHAR_LOWERCASE_W:
485 case LEX_CHAR_UPPERCASE_W: {
486 break;
487 }
488 case LEX_CHAR_LOWERCASE_P:
489 case LEX_CHAR_UPPERCASE_P: {
490 ParseUnicodePropertyEscape();
491 break;
492 }
493 case LEX_CHAR_LOWERCASE_C: {
494 cp = Peek();
495 if ((cp < LEX_CHAR_LOWERCASE_A || cp > LEX_CHAR_LOWERCASE_Z) &&
496 (cp < LEX_CHAR_UPPERCASE_A || cp > LEX_CHAR_UPPERCASE_Z)) {
497 ThrowError("Invalid control escape");
498 }
499
500 Next();
501 break;
502 }
503 default: {
504 /* IdentityEscape */
505 if (Unicode() && !IsSyntaxCharacter(cp) && cp != LEX_CHAR_SLASH) {
506 ThrowError("Invalid escape");
507 }
508 }
509 }
510 }
511
ParseDecimalEscape()512 uint32_t RegExpParser::ParseDecimalEscape()
513 {
514 ASSERT(IsDecimalDigit(Peek()));
515
516 auto digitStart = iter_;
517 uint32_t decimalValue = DigitValue(Next());
518 if (decimalValue == 0) {
519 if (!IsDecimalDigit(Peek())) {
520 /* \0 */
521 return decimalValue;
522 }
523
524 if (Unicode()) {
525 ThrowError("Invalid decimal escape");
526 }
527
528 iter_ = digitStart;
529 return ParseLegacyOctalEscape();
530 }
531
532 constexpr auto MULTIPLIER = 10;
533
534 while (IsDecimalDigit(Peek())) {
535 uint32_t newValue = decimalValue * MULTIPLIER + DigitValue(Next());
536 if (newValue < decimalValue) {
537 ThrowError("Invalid decimal escape");
538 }
539
540 decimalValue = newValue;
541 }
542
543 if (decimalValue <= capturingGroupCount_) {
544 return decimalValue;
545 }
546
547 if (Unicode()) {
548 ThrowError("Invalid decimal escape");
549 }
550
551 iter_ = digitStart;
552
553 if (!IsOctalDigit(Peek())) {
554 /* \8 or \9 */
555 return DigitValue(Next());
556 }
557
558 return ParseLegacyOctalEscape();
559 }
560
ParseLegacyOctalEscape()561 uint32_t RegExpParser::ParseLegacyOctalEscape()
562 {
563 ASSERT(IsOctalDigit(Peek()));
564 uint32_t octalValue = DigitValue(Next());
565
566 if (!IsOctalDigit(Peek())) {
567 return octalValue;
568 }
569
570 octalValue = octalValue * 8 + DigitValue(Next());
571
572 if (!IsOctalDigit(Peek())) {
573 return octalValue;
574 }
575
576 // 8 is to left shift octalValue by three bits
577 uint32_t newValue = octalValue * 8 + DigitValue(Peek());
578 constexpr uint32_t MAX_OCTAL_VALUE = 0xFF;
579
580 if (newValue <= MAX_OCTAL_VALUE) {
581 octalValue = newValue;
582 Next();
583 }
584
585 return octalValue;
586 }
587
ParseHexEscape()588 uint32_t RegExpParser::ParseHexEscape()
589 {
590 // two hexadecimal digits after x in the regular expression
591 char32_t digit = Next();
592 if (!IsHexDigit(digit)) {
593 ThrowError("Invalid hex escape");
594 }
595
596 constexpr auto MULTIPLIER = 16;
597 uint32_t cpValue = HexValue(digit) * MULTIPLIER;
598
599 digit = Next();
600 if (!IsHexDigit(digit)) {
601 ThrowError("Invalid hex escape");
602 }
603
604 cpValue += HexValue(digit);
605 return cpValue;
606 }
607
ParseUnicodeDigits()608 uint32_t RegExpParser::ParseUnicodeDigits()
609 {
610 uint32_t value = 0;
611 uint32_t count = 4;
612
613 while (count--) {
614 char32_t digit = Next();
615 if (!IsHexDigit(digit)) {
616 ThrowError("Invalid Unicode escape");
617 }
618
619 constexpr auto MULTIPLIER = 16;
620 value = value * MULTIPLIER + HexValue(digit);
621 }
622
623 return value;
624 }
625
ParseUnicodeEscape()626 uint32_t RegExpParser::ParseUnicodeEscape()
627 {
628 uint32_t value = 0;
629
630 if (Peek() == LEX_CHAR_LEFT_BRACE) {
631 Next();
632 if (!IsHexDigit(Peek())) {
633 ThrowError("Invalid Unicode escape");
634 }
635
636 while (IsHexDigit(Peek())) {
637 constexpr auto MULTIPLIER = 16;
638 value = value * MULTIPLIER + HexValue(Next());
639 constexpr uint32_t CODE_POINT_MAX = 0x10FFFF;
640
641 if (value > CODE_POINT_MAX) {
642 ThrowError("Invalid Unicode escape");
643 }
644 }
645
646 if (Peek() != LEX_CHAR_RIGHT_BRACE) {
647 ThrowError("Invalid Unicode escape");
648 }
649
650 Next();
651 } else {
652 value = ParseUnicodeDigits();
653 if (Unicode() && util::StringView::IsHighSurrogate(value)) {
654 auto pos = iter_;
655
656 if (Next() == LEX_CHAR_BACKSLASH && Next() == LEX_CHAR_LOWERCASE_U) {
657 uint32_t next = ParseUnicodeDigits();
658 if (util::StringView::IsLowSurrogate(next)) {
659 return util::StringView::DecodeSurrogates(value, next);
660 }
661 }
662
663 iter_ = pos;
664 }
665 }
666
667 return value;
668 }
669
ParseUnicodePropertyEscape()670 void RegExpParser::ParseUnicodePropertyEscape()
671 {
672 if (!Unicode()) {
673 return;
674 }
675
676 if (Peek() != LEX_CHAR_LEFT_BRACE) {
677 ThrowError("Invalid Unicode property escape");
678 }
679
680 Next();
681
682 while (true) {
683 if (!iter_.HasNext()) {
684 ThrowError("Unterminated Unicode property escape");
685 }
686
687 char32_t ch = Next();
688 if (ch == LEX_CHAR_RIGHT_BRACE) {
689 break;
690 }
691
692 /* TODO(dbatyai): Parse and valide Unicode property names */
693 }
694 }
695
ParseNamedBackreference()696 void RegExpParser::ParseNamedBackreference()
697 {
698 if (groupNames_.empty()) {
699 /* Identity escape */
700 return;
701 }
702
703 if (Next() != LEX_CHAR_LESS_THAN) {
704 ThrowError("Invalid named backreference");
705 }
706
707 util::StringView name = ParseIdent();
708 namedGroupReferences_.insert(name);
709 }
710
ValidateNamedGroupReferences()711 void RegExpParser::ValidateNamedGroupReferences()
712 {
713 for (auto& ref : namedGroupReferences_) {
714 auto result = groupNames_.find(ref);
715 if (result == groupNames_.end()) {
716 ThrowError("Invalid named capture referenced");
717 }
718 }
719 }
720
ParseQuantifier()721 void RegExpParser::ParseQuantifier()
722 {
723 switch (Peek()) {
724 case LEX_CHAR_ASTERISK:
725 case LEX_CHAR_PLUS:
726 case LEX_CHAR_QUESTION: {
727 Next();
728 break;
729 }
730 case LEX_CHAR_LEFT_BRACE: {
731 if (!ParseBracedQuantifier()) {
732 return;
733 }
734
735 break;
736 }
737 default: {
738 return;
739 }
740 }
741
742 if (Peek() == LEX_CHAR_QUESTION) {
743 Next();
744 }
745 }
746
ParseBracedQuantifier()747 bool RegExpParser::ParseBracedQuantifier()
748 {
749 if (Peek() != LEX_CHAR_LEFT_BRACE) {
750 return false;
751 }
752
753 auto startPos = iter_;
754 Next();
755
756 if (!IsDecimalDigit(Peek())) {
757 iter_ = startPos;
758 return false;
759 }
760
761 uint32_t leftValue = 0;
762 constexpr auto MULTIPLIER = 10;
763
764 while (IsDecimalDigit(Peek())) {
765 uint32_t newValue = leftValue * MULTIPLIER + DigitValue(Next());
766 if (newValue < leftValue) {
767 leftValue = std::numeric_limits<uint32_t>::max();
768 continue;
769 }
770
771 leftValue = newValue;
772 }
773
774 if (Peek() == LEX_CHAR_COMMA) {
775 Next();
776 }
777
778 if (Peek() == LEX_CHAR_RIGHT_BRACE) {
779 Next();
780 return true;
781 }
782
783 if (IsDecimalDigit(Peek())) {
784 uint32_t rightValue = 0;
785 while (IsDecimalDigit(Peek())) {
786 uint32_t newValue = rightValue * MULTIPLIER + DigitValue(Next());
787 if (newValue < rightValue) {
788 rightValue = std::numeric_limits<uint32_t>::max();
789 continue;
790 }
791
792 rightValue = newValue;
793 }
794
795 if (Peek() == LEX_CHAR_RIGHT_BRACE) {
796 if (rightValue < leftValue) {
797 ThrowError("Quantifier range out of order");
798 }
799
800 Next();
801 return true;
802 }
803 }
804
805 iter_ = startPos;
806 return false;
807 }
808
ParsePatternCharacter()809 bool RegExpParser::ParsePatternCharacter()
810 {
811 char32_t cp = Peek();
812 if (IsSyntaxCharacter(cp)) {
813 return false;
814 }
815
816 Next();
817 return true;
818 }
819
IsIdStart(uint32_t cp)820 static bool IsIdStart(uint32_t cp)
821 {
822 auto uchar = static_cast<UChar32>(cp);
823 return u_isIDStart(uchar) != 0 || cp == LEX_CHAR_DOLLAR_SIGN || cp == LEX_CHAR_UNDERSCORE;
824 }
825
IsIdCont(uint32_t cp)826 static bool IsIdCont(uint32_t cp)
827 {
828 auto uchar = static_cast<UChar32>(cp);
829 return u_isIDPart(uchar) != 0 || cp == LEX_CHAR_DOLLAR_SIGN || cp == LEX_CHAR_ZWNJ || cp == LEX_CHAR_ZWJ;
830 }
831
ParseIdent()832 util::StringView RegExpParser::ParseIdent()
833 {
834 char32_t cp = Next();
835 if (cp == LEX_CHAR_BACKSLASH) {
836 if (Next() != LEX_CHAR_LOWERCASE_U) {
837 ThrowError("Invalid group name");
838 }
839
840 if (!Unicode() && Peek() == LEX_CHAR_LEFT_BRACE) {
841 ThrowError("Invalid Unicode escape");
842 }
843
844 cp = ParseUnicodeEscape();
845 }
846
847 if (!IsIdStart(cp)) {
848 ThrowError("Invalid group name");
849 }
850
851 util::UString ident(allocator_);
852 ident.Append(cp);
853
854 while (true) {
855 cp = Next();
856 if (cp == LEX_CHAR_GREATER_THAN) {
857 break;
858 }
859
860 if (cp == LEX_CHAR_BACKSLASH) {
861 if (Next() != LEX_CHAR_LOWERCASE_U) {
862 ThrowError("Invalid group name");
863 }
864
865 if (!Unicode() && Peek() == LEX_CHAR_LEFT_BRACE) {
866 ThrowError("Invalid Unicode escape");
867 }
868
869 cp = ParseUnicodeEscape();
870 }
871
872 if (!IsIdCont(cp)) {
873 ThrowError("Invalid group name");
874 }
875
876 ident.Append(cp);
877 }
878
879 return ident.View();
880 }
881
882 } // namespace panda::es2panda::lexer
883