1 /**
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "regexp.h"
17
18 #include <lexer/token/letters.h>
19 #include <unicode/uchar.h>
20
21 #include <iostream>
22
23 namespace panda::es2panda::lexer {
24
RegExpError(const std::string_view & m)25 RegExpError::RegExpError(const std::string_view &m) : message(m) {}
26
RegExp(util::StringView p,util::StringView f,RegExpFlags reFlags)27 RegExp::RegExp(util::StringView p, util::StringView f, RegExpFlags reFlags) : patternStr(p), flagsStr(f), flags(reFlags)
28 {
29 }
30
RegExpParser(const RegExp & re,ArenaAllocator * allocator)31 RegExpParser::RegExpParser(const RegExp &re, ArenaAllocator *allocator)
32 : re_(re), allocator_ {allocator}, iter_(re_.patternStr), capturingGroupCount_(0)
33 {
34 }
35
Unicode() const36 bool RegExpParser::Unicode() const
37 {
38 return (re_.flags & RegExpFlags::UNICODE) != 0;
39 }
40
Peek() const41 char32_t RegExpParser::Peek() const
42 {
43 return iter_.Peek();
44 }
45
Next()46 char32_t RegExpParser::Next()
47 {
48 return iter_.Next();
49 }
50
IsDecimalDigit(char32_t cp)51 static bool IsDecimalDigit(char32_t cp)
52 {
53 return (cp >= LEX_CHAR_0 && cp <= LEX_CHAR_9);
54 }
55
IsOctalDigit(char32_t cp)56 static bool IsOctalDigit(char32_t cp)
57 {
58 return (cp >= LEX_CHAR_0 && cp <= LEX_CHAR_7);
59 }
60
IsHexDigit(char32_t cp)61 static bool IsHexDigit(char32_t cp)
62 {
63 return IsDecimalDigit(cp) || (cp >= LEX_CHAR_LOWERCASE_A && cp <= LEX_CHAR_LOWERCASE_F) ||
64 (cp >= LEX_CHAR_UPPERCASE_A && cp <= LEX_CHAR_UPPERCASE_F);
65 }
66
DigitValue(char32_t cp)67 static uint32_t DigitValue(char32_t cp)
68 {
69 return (cp - LEX_CHAR_0);
70 }
71
ThrowError(const std::string_view & message)72 static void ThrowError(const std::string_view &message)
73 {
74 throw RegExpError(message);
75 }
76
HexValue(char32_t cp)77 static uint32_t HexValue(char32_t cp)
78 {
79 if (IsDecimalDigit(cp)) {
80 return DigitValue(cp);
81 }
82
83 constexpr auto OFFSET = 10;
84
85 if (cp < LEX_CHAR_LOWERCASE_A) {
86 ASSERT(cp >= LEX_CHAR_UPPERCASE_A);
87 return cp - LEX_CHAR_UPPERCASE_A + OFFSET;
88 }
89
90 return (cp - LEX_CHAR_LOWERCASE_A + OFFSET);
91 }
92
ParsePattern()93 void RegExpParser::ParsePattern()
94 {
95 ParseDisjunction();
96
97 if (iter_.HasNext()) {
98 ThrowError("Invalid closing parenthesis");
99 }
100 ValidateNamedGroupReferences();
101 }
102
ParseDisjunction()103 void RegExpParser::ParseDisjunction()
104 {
105 while (true) {
106 ParseAlternatives();
107
108 if (Peek() != LEX_CHAR_VLINE) {
109 break;
110 }
111
112 Next();
113 };
114 }
115
ParseAlternative()116 void RegExpParser::ParseAlternative()
117 {
118 switch (Peek()) {
119 case LEX_CHAR_BACKSLASH: {
120 Next();
121 char32_t cp = Peek();
122 if (cp == LEX_CHAR_LOWERCASE_B || cp == LEX_CHAR_UPPERCASE_B) {
123 /* assertion */
124 Next();
125 return;
126 }
127
128 ParseAtomEscape();
129 break;
130 }
131 case LEX_CHAR_CIRCUMFLEX:
132 case LEX_CHAR_DOLLAR_SIGN: {
133 /* assertion */
134 Next();
135 return;
136 }
137 case LEX_CHAR_LEFT_PAREN: {
138 Next();
139
140 if (Peek() != LEX_CHAR_QUESTION) {
141 ParseCapturingGroup();
142 break;
143 }
144
145 Next(); // eat '?'
146
147 char32_t cp = Next();
148 if (cp == LEX_CHAR_COLON) {
149 ParseNonCapturingGroup();
150 break;
151 }
152
153 if (cp == LEX_CHAR_EQUALS || cp == LEX_CHAR_EXCLAMATION) {
154 ParseAssertion();
155
156 if (Unicode()) {
157 return;
158 }
159
160 break;
161 }
162
163 if (cp != LEX_CHAR_LESS_THAN) {
164 ThrowError("Invalid group");
165 }
166
167 cp = Peek();
168 if (cp == LEX_CHAR_EQUALS || cp == LEX_CHAR_EXCLAMATION) {
169 Next();
170 ParseAssertion();
171 return;
172 }
173
174 ParseNamedCapturingGroup();
175 break;
176 }
177 case LEX_CHAR_LEFT_SQUARE: {
178 Next();
179 ParseCharacterClass();
180 break;
181 }
182 case LEX_CHAR_DOT: {
183 Next();
184 break;
185 }
186 default: {
187 if (ParseBracedQuantifier()) {
188 ThrowError("Invalid quantifier, nothing to repeat");
189 }
190
191 if (!ParsePatternCharacter()) {
192 ThrowError("Invalid character");
193 }
194
195 break;
196 }
197 }
198
199 ParseQuantifier();
200 }
201
ParseAlternatives()202 void RegExpParser::ParseAlternatives()
203 {
204 while (true) {
205 switch (Peek()) {
206 case util::StringView::Iterator::INVALID_CP:
207 case LEX_CHAR_RIGHT_PAREN:
208 case LEX_CHAR_VLINE: {
209 return;
210 }
211 default: {
212 ParseAlternative();
213 }
214 }
215 }
216 }
217
ParseNonCapturingGroup()218 void RegExpParser::ParseNonCapturingGroup()
219 {
220 ParseDisjunction();
221
222 if (Peek() != LEX_CHAR_RIGHT_PAREN) {
223 ThrowError("Invalid non-capturing group");
224 }
225
226 Next();
227 }
228
ParseNamedCapturingGroup()229 void RegExpParser::ParseNamedCapturingGroup()
230 {
231 util::StringView name = ParseIdent();
232
233 auto result = groupNames_.insert(name);
234 if (!result.second) {
235 ThrowError("Duplicate group name");
236 }
237
238 ParseCapturingGroup();
239 }
240
ParseCapturingGroup()241 void RegExpParser::ParseCapturingGroup()
242 {
243 capturingGroupCount_++;
244
245 ParseDisjunction();
246
247 if (Peek() != LEX_CHAR_RIGHT_PAREN) {
248 ThrowError("Invalid capturing group");
249 }
250
251 Next();
252 }
253
ParseAssertion()254 void RegExpParser::ParseAssertion()
255 {
256 ParseDisjunction();
257
258 if (Peek() != LEX_CHAR_RIGHT_PAREN) {
259 ThrowError("Invalid assertion");
260 }
261
262 Next();
263 }
264
ParseControlEscape()265 uint32_t RegExpParser::ParseControlEscape()
266 {
267 char32_t cp = Peek();
268 if ((cp < LEX_CHAR_LOWERCASE_A || cp > LEX_CHAR_LOWERCASE_Z) &&
269 (cp < LEX_CHAR_UPPERCASE_A || cp > LEX_CHAR_UPPERCASE_Z)) {
270 if (Unicode()) {
271 ThrowError("Invalid control escape");
272 }
273
274 if (cp < LEX_CHAR_0 || cp > LEX_CHAR_9) {
275 return LEX_CHAR_LOWERCASE_C;
276 }
277 }
278
279 Next();
280 constexpr auto MODULO = 32;
281 return cp % MODULO;
282 }
283
ParseClassAtom()284 char32_t RegExpParser::ParseClassAtom()
285 {
286 char32_t cp = Next();
287 if (cp != LEX_CHAR_BACKSLASH) {
288 return cp;
289 }
290
291 cp = Peek();
292 if (cp == LEX_CHAR_0) {
293 if (!Unicode()) {
294 return ParseDecimalEscape();
295 }
296
297 Next();
298
299 if (IsDecimalDigit(Peek())) {
300 ThrowError("Invalid class escape");
301 }
302
303 return LEX_CHAR_NULL;
304 }
305
306 Next();
307
308 switch (cp) {
309 case LEX_CHAR_LOWERCASE_C: {
310 return ParseControlEscape();
311 }
312 case LEX_CHAR_LOWERCASE_X: {
313 return ParseHexEscape();
314 }
315 case LEX_CHAR_LOWERCASE_U: {
316 if (!Unicode() && Peek() == LEX_CHAR_LEFT_BRACE) {
317 return cp;
318 }
319
320 return ParseUnicodeEscape();
321 }
322 case LEX_CHAR_LOWERCASE_P:
323 case LEX_CHAR_UPPERCASE_P: {
324 if (!Unicode()) {
325 return cp;
326 }
327
328 ParseUnicodePropertyEscape();
329 [[fallthrough]];
330 }
331 case LEX_CHAR_LOWERCASE_D:
332 case LEX_CHAR_UPPERCASE_D:
333 case LEX_CHAR_LOWERCASE_S:
334 case LEX_CHAR_UPPERCASE_S:
335 case LEX_CHAR_LOWERCASE_W:
336 case LEX_CHAR_UPPERCASE_W: {
337 return std::numeric_limits<uint32_t>::max();
338 }
339 case LEX_CHAR_LOWERCASE_B: {
340 return LEX_CHAR_BS;
341 }
342 case LEX_CHAR_LOWERCASE_F: {
343 return LEX_CHAR_FF;
344 }
345 case LEX_CHAR_LOWERCASE_N: {
346 return LEX_CHAR_LF;
347 }
348 case LEX_CHAR_LOWERCASE_R: {
349 return LEX_CHAR_CR;
350 }
351 case LEX_CHAR_LOWERCASE_T: {
352 return LEX_CHAR_TAB;
353 }
354 case LEX_CHAR_LOWERCASE_V: {
355 return LEX_CHAR_VT;
356 }
357 case LEX_CHAR_MINUS: {
358 return cp;
359 }
360 default: {
361 if (Unicode() && !IsSyntaxCharacter(cp) && cp != LEX_CHAR_SLASH) {
362 ThrowError("Invalid escape");
363 }
364
365 return cp;
366 }
367 }
368
369 return cp;
370 }
371
IsClassEscape(uint32_t cp)372 static bool IsClassEscape(uint32_t cp)
373 {
374 return cp == std::numeric_limits<uint32_t>::max();
375 }
376
ParseCharacterClass()377 void RegExpParser::ParseCharacterClass()
378 {
379 if (Peek() == LEX_CHAR_CIRCUMFLEX) {
380 Next();
381 }
382
383 while (true) {
384 if (Peek() == LEX_CHAR_RIGHT_SQUARE) {
385 Next();
386 break;
387 }
388
389 uint32_t left = ParseClassAtom();
390
391 if (Peek() != LEX_CHAR_MINUS) {
392 continue;
393 }
394
395 Next();
396
397 if (Peek() == LEX_CHAR_RIGHT_SQUARE) {
398 Next();
399 break;
400 }
401
402 uint32_t right = ParseClassAtom();
403 if ((IsClassEscape(left) || IsClassEscape(right))) {
404 if (Unicode()) {
405 ThrowError("Invalid character class");
406 }
407
408 continue;
409 }
410
411 if (left > right) {
412 ThrowError("Class range out of order");
413 }
414 }
415 }
416
IsSyntaxCharacter(char32_t cp) const417 bool RegExpParser::IsSyntaxCharacter(char32_t cp) const
418 {
419 switch (cp) {
420 case LEX_CHAR_RIGHT_SQUARE:
421 case LEX_CHAR_LEFT_BRACE:
422 case LEX_CHAR_RIGHT_BRACE: {
423 if (!Unicode()) {
424 return false;
425 }
426
427 [[fallthrough]];
428 }
429 case LEX_CHAR_CIRCUMFLEX:
430 case LEX_CHAR_DOLLAR_SIGN:
431 case LEX_CHAR_BACKSLASH:
432 case LEX_CHAR_DOT:
433 case LEX_CHAR_ASTERISK:
434 case LEX_CHAR_PLUS:
435 case LEX_CHAR_QUESTION:
436 case LEX_CHAR_LEFT_PAREN:
437 case LEX_CHAR_RIGHT_PAREN:
438 case LEX_CHAR_LEFT_SQUARE:
439 case LEX_CHAR_VLINE: {
440 return true;
441 }
442 default: {
443 return false;
444 }
445 }
446 }
447
ParseAtomEscape()448 void RegExpParser::ParseAtomEscape()
449 {
450 char32_t cp = Peek();
451 if (IsDecimalDigit(cp)) {
452 ParseDecimalEscape();
453 return;
454 }
455
456 Next();
457
458 switch (cp) {
459 case LEX_CHAR_LOWERCASE_X: {
460 if (Unicode()) {
461 ParseHexEscape();
462 }
463 break;
464 }
465 case LEX_CHAR_LOWERCASE_U: {
466 if (Unicode()) {
467 ParseUnicodeEscape();
468 }
469 break;
470 }
471 case LEX_CHAR_LOWERCASE_K: {
472 ParseNamedBackreference();
473 break;
474 }
475 /* ControlEscape */
476 case LEX_CHAR_LOWERCASE_F:
477 case LEX_CHAR_LOWERCASE_N:
478 case LEX_CHAR_LOWERCASE_R:
479 case LEX_CHAR_LOWERCASE_T:
480 case LEX_CHAR_LOWERCASE_V:
481 /* CharacterClassEscape */
482 case LEX_CHAR_LOWERCASE_D:
483 case LEX_CHAR_UPPERCASE_D:
484 case LEX_CHAR_LOWERCASE_S:
485 case LEX_CHAR_UPPERCASE_S:
486 case LEX_CHAR_LOWERCASE_W:
487 case LEX_CHAR_UPPERCASE_W: {
488 break;
489 }
490 case LEX_CHAR_LOWERCASE_P:
491 case LEX_CHAR_UPPERCASE_P: {
492 ParseUnicodePropertyEscape();
493 break;
494 }
495 case LEX_CHAR_LOWERCASE_C: {
496 cp = Peek();
497 if ((cp < LEX_CHAR_LOWERCASE_A || cp > LEX_CHAR_LOWERCASE_Z) &&
498 (cp < LEX_CHAR_UPPERCASE_A || cp > LEX_CHAR_UPPERCASE_Z)) {
499 ThrowError("Invalid control escape");
500 }
501
502 Next();
503 break;
504 }
505 default: {
506 /* IdentityEscape */
507 if (Unicode() && !IsSyntaxCharacter(cp) && cp != LEX_CHAR_SLASH) {
508 ThrowError("Invalid escape");
509 }
510 }
511 }
512 }
513
ParseDecimalEscape()514 uint32_t RegExpParser::ParseDecimalEscape()
515 {
516 ASSERT(IsDecimalDigit(Peek()));
517
518 auto digitStart = iter_;
519 uint32_t decimalValue = DigitValue(Next());
520 if (decimalValue == 0) {
521 if (!IsDecimalDigit(Peek())) {
522 /* \0 */
523 return decimalValue;
524 }
525
526 if (Unicode()) {
527 ThrowError("Invalid decimal escape");
528 }
529
530 iter_ = digitStart;
531 return ParseLegacyOctalEscape();
532 }
533
534 constexpr auto MULTIPLIER = 10;
535
536 while (IsDecimalDigit(Peek())) {
537 uint32_t newValue = decimalValue * MULTIPLIER + DigitValue(Next());
538 if (newValue < decimalValue) {
539 ThrowError("Invalid decimal escape");
540 }
541
542 decimalValue = newValue;
543 }
544
545 if (decimalValue <= capturingGroupCount_) {
546 return decimalValue;
547 }
548
549 if (Unicode()) {
550 ThrowError("Invalid decimal escape");
551 }
552
553 iter_ = digitStart;
554
555 if (!IsOctalDigit(Peek())) {
556 /* \8 or \9 */
557 return DigitValue(Next());
558 }
559
560 return ParseLegacyOctalEscape();
561 }
562
ParseLegacyOctalEscape()563 uint32_t RegExpParser::ParseLegacyOctalEscape()
564 {
565 ASSERT(IsOctalDigit(Peek()));
566 uint32_t octalValue = DigitValue(Next());
567
568 if (!IsOctalDigit(Peek())) {
569 return octalValue;
570 }
571
572 octalValue = octalValue * 8 + DigitValue(Next());
573
574 if (!IsOctalDigit(Peek())) {
575 return octalValue;
576 }
577
578 // 8 is to left shift octalValue by three bits
579 uint32_t newValue = octalValue * 8 + DigitValue(Peek());
580 constexpr uint32_t MAX_OCTAL_VALUE = 0xFF;
581
582 if (newValue <= MAX_OCTAL_VALUE) {
583 octalValue = newValue;
584 Next();
585 }
586
587 return octalValue;
588 }
589
ParseHexEscape()590 uint32_t RegExpParser::ParseHexEscape()
591 {
592 // two hexadecimal digits after x in the regular expression
593 char32_t digit = Next();
594 if (!IsHexDigit(digit)) {
595 ThrowError("Invalid hex escape");
596 }
597
598 constexpr auto MULTIPLIER = 16;
599 uint32_t cpValue = HexValue(digit) * MULTIPLIER;
600
601 digit = Next();
602 if (!IsHexDigit(digit)) {
603 ThrowError("Invalid hex escape");
604 }
605
606 cpValue += HexValue(digit);
607 return cpValue;
608 }
609
ParseUnicodeDigits()610 uint32_t RegExpParser::ParseUnicodeDigits()
611 {
612 uint32_t value = 0;
613 uint32_t count = 4;
614
615 while (count--) {
616 char32_t digit = Next();
617 if (!IsHexDigit(digit)) {
618 ThrowError("Invalid Unicode escape");
619 }
620
621 constexpr auto MULTIPLIER = 16;
622 value = value * MULTIPLIER + HexValue(digit);
623 }
624
625 return value;
626 }
627
ParseUnicodeEscape()628 uint32_t RegExpParser::ParseUnicodeEscape()
629 {
630 uint32_t value = 0;
631
632 if (Peek() == LEX_CHAR_LEFT_BRACE) {
633 Next();
634 if (!IsHexDigit(Peek())) {
635 ThrowError("Invalid Unicode escape");
636 }
637
638 while (IsHexDigit(Peek())) {
639 constexpr auto MULTIPLIER = 16;
640 value = value * MULTIPLIER + HexValue(Next());
641 constexpr uint32_t CODE_POINT_MAX = 0x10FFFF;
642
643 if (value > CODE_POINT_MAX) {
644 ThrowError("Invalid Unicode escape");
645 }
646 }
647
648 if (Peek() != LEX_CHAR_RIGHT_BRACE) {
649 ThrowError("Invalid Unicode escape");
650 }
651
652 Next();
653 } else {
654 value = ParseUnicodeDigits();
655 if (Unicode() && util::StringView::IsHighSurrogate(value)) {
656 auto pos = iter_;
657
658 if (Next() == LEX_CHAR_BACKSLASH && Next() == LEX_CHAR_LOWERCASE_U) {
659 uint32_t next = ParseUnicodeDigits();
660 if (util::StringView::IsLowSurrogate(next)) {
661 return util::StringView::DecodeSurrogates(value, next);
662 }
663 }
664
665 iter_ = pos;
666 }
667 }
668
669 return value;
670 }
671
ParseUnicodePropertyEscape()672 void RegExpParser::ParseUnicodePropertyEscape()
673 {
674 if (!Unicode()) {
675 return;
676 }
677
678 if (Peek() != LEX_CHAR_LEFT_BRACE) {
679 ThrowError("Invalid Unicode property escape");
680 }
681
682 Next();
683
684 while (true) {
685 if (!iter_.HasNext()) {
686 ThrowError("Unterminated Unicode property escape");
687 }
688
689 char32_t ch = Next();
690 if (ch == LEX_CHAR_RIGHT_BRACE) {
691 break;
692 }
693
694 /* TODO(dbatyai): Parse and valide Unicode property names */
695 }
696 }
697
ParseNamedBackreference()698 void RegExpParser::ParseNamedBackreference()
699 {
700 if (groupNames_.empty()) {
701 /* Identity escape */
702 return;
703 }
704
705 if (Next() != LEX_CHAR_LESS_THAN) {
706 ThrowError("Invalid named backreference");
707 }
708
709 util::StringView name = ParseIdent();
710 namedGroupReferences_.insert(name);
711 }
712
ValidateNamedGroupReferences()713 void RegExpParser::ValidateNamedGroupReferences()
714 {
715 for (auto& ref : namedGroupReferences_) {
716 auto result = groupNames_.find(ref);
717 if (result == groupNames_.end()) {
718 ThrowError("Invalid named capture referenced");
719 }
720 }
721 }
722
ParseQuantifier()723 void RegExpParser::ParseQuantifier()
724 {
725 switch (Peek()) {
726 case LEX_CHAR_ASTERISK:
727 case LEX_CHAR_PLUS:
728 case LEX_CHAR_QUESTION: {
729 Next();
730 break;
731 }
732 case LEX_CHAR_LEFT_BRACE: {
733 if (!ParseBracedQuantifier()) {
734 return;
735 }
736
737 break;
738 }
739 default: {
740 return;
741 }
742 }
743
744 if (Peek() == LEX_CHAR_QUESTION) {
745 Next();
746 }
747 }
748
ParseBracedQuantifier()749 bool RegExpParser::ParseBracedQuantifier()
750 {
751 if (Peek() != LEX_CHAR_LEFT_BRACE) {
752 return false;
753 }
754
755 auto startPos = iter_;
756 Next();
757
758 if (!IsDecimalDigit(Peek())) {
759 iter_ = startPos;
760 return false;
761 }
762
763 uint32_t leftValue = 0;
764 constexpr auto MULTIPLIER = 10;
765
766 while (IsDecimalDigit(Peek())) {
767 uint32_t newValue = leftValue * MULTIPLIER + DigitValue(Next());
768 if (newValue < leftValue) {
769 leftValue = std::numeric_limits<uint32_t>::max();
770 continue;
771 }
772
773 leftValue = newValue;
774 }
775
776 if (Peek() == LEX_CHAR_COMMA) {
777 Next();
778 }
779
780 if (Peek() == LEX_CHAR_RIGHT_BRACE) {
781 Next();
782 return true;
783 }
784
785 if (IsDecimalDigit(Peek())) {
786 uint32_t rightValue = 0;
787 while (IsDecimalDigit(Peek())) {
788 uint32_t newValue = rightValue * MULTIPLIER + DigitValue(Next());
789 if (newValue < rightValue) {
790 rightValue = std::numeric_limits<uint32_t>::max();
791 continue;
792 }
793
794 rightValue = newValue;
795 }
796
797 if (Peek() == LEX_CHAR_RIGHT_BRACE) {
798 if (rightValue < leftValue) {
799 ThrowError("Quantifier range out of order");
800 }
801
802 Next();
803 return true;
804 }
805 }
806
807 iter_ = startPos;
808 return false;
809 }
810
ParsePatternCharacter()811 bool RegExpParser::ParsePatternCharacter()
812 {
813 char32_t cp = Peek();
814 if (IsSyntaxCharacter(cp)) {
815 return false;
816 }
817
818 Next();
819 return true;
820 }
821
IsIdStart(uint32_t cp)822 static bool IsIdStart(uint32_t cp)
823 {
824 auto uchar = static_cast<UChar32>(cp);
825 return u_isIDStart(uchar) != 0 || cp == LEX_CHAR_DOLLAR_SIGN || cp == LEX_CHAR_UNDERSCORE;
826 }
827
IsIdCont(uint32_t cp)828 static bool IsIdCont(uint32_t cp)
829 {
830 auto uchar = static_cast<UChar32>(cp);
831 return u_isIDPart(uchar) != 0 || cp == LEX_CHAR_DOLLAR_SIGN || cp == LEX_CHAR_ZWNJ || cp == LEX_CHAR_ZWJ;
832 }
833
ParseIdent()834 util::StringView RegExpParser::ParseIdent()
835 {
836 char32_t cp = Next();
837 if (cp == LEX_CHAR_BACKSLASH) {
838 if (Next() != LEX_CHAR_LOWERCASE_U) {
839 ThrowError("Invalid group name");
840 }
841
842 if (!Unicode() && Peek() == LEX_CHAR_LEFT_BRACE) {
843 ThrowError("Invalid Unicode escape");
844 }
845
846 cp = ParseUnicodeEscape();
847 }
848
849 if (!IsIdStart(cp)) {
850 ThrowError("Invalid group name");
851 }
852
853 util::UString ident(allocator_);
854 ident.Append(cp);
855
856 while (true) {
857 cp = Next();
858 if (cp == LEX_CHAR_GREATER_THAN) {
859 break;
860 }
861
862 if (cp == LEX_CHAR_BACKSLASH) {
863 if (Next() != LEX_CHAR_LOWERCASE_U) {
864 ThrowError("Invalid group name");
865 }
866
867 if (!Unicode() && Peek() == LEX_CHAR_LEFT_BRACE) {
868 ThrowError("Invalid Unicode escape");
869 }
870
871 cp = ParseUnicodeEscape();
872 }
873
874 if (!IsIdCont(cp)) {
875 ThrowError("Invalid group name");
876 }
877
878 ident.Append(cp);
879 }
880
881 return ident.View();
882 }
883
884 } // namespace panda::es2panda::lexer
885