1 /**
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "regexp.h"
17
18 #include <lexer/token/letters.h>
19 #include <unicode/uchar.h>
20
21 #include <iostream>
22
23 namespace panda::es2panda::lexer {
24
RegExpError(std::string_view m)25 RegExpError::RegExpError(std::string_view m) : message(m) {}
26
RegExp(util::StringView p,util::StringView f,RegExpFlags reFlags)27 RegExp::RegExp(util::StringView p, util::StringView f, RegExpFlags reFlags) : patternStr(p), flagsStr(f), flags(reFlags)
28 {
29 }
30
RegExpParser(const RegExp & re,ArenaAllocator * allocator)31 RegExpParser::RegExpParser(const RegExp &re, ArenaAllocator *allocator)
32 : re_(re), allocator_ {allocator}, iter_(re_.patternStr), capturingGroupCount_(0)
33 {
34 }
35
Unicode() const36 bool RegExpParser::Unicode() const
37 {
38 return (re_.flags & RegExpFlags::UNICODE) != 0;
39 }
40
Peek() const41 char32_t RegExpParser::Peek() const
42 {
43 return iter_.Peek();
44 }
45
Next()46 char32_t RegExpParser::Next()
47 {
48 return iter_.Next();
49 }
50
IsDecimalDigit(char32_t cp)51 static bool IsDecimalDigit(char32_t cp)
52 {
53 return (cp >= LEX_CHAR_0 && cp <= LEX_CHAR_9);
54 }
55
IsOctalDigit(char32_t cp)56 static bool IsOctalDigit(char32_t cp)
57 {
58 return (cp >= LEX_CHAR_0 && cp <= LEX_CHAR_7);
59 }
60
IsHexDigit(char32_t cp)61 static bool IsHexDigit(char32_t cp)
62 {
63 return IsDecimalDigit(cp) || (cp >= LEX_CHAR_LOWERCASE_A && cp <= LEX_CHAR_LOWERCASE_F) ||
64 (cp >= LEX_CHAR_UPPERCASE_A && cp <= LEX_CHAR_UPPERCASE_F);
65 }
66
DigitValue(char32_t cp)67 static uint32_t DigitValue(char32_t cp)
68 {
69 return (cp - LEX_CHAR_0);
70 }
71
HexValue(char32_t cp)72 static uint32_t HexValue(char32_t cp)
73 {
74 if (IsDecimalDigit(cp)) {
75 return DigitValue(cp);
76 }
77
78 constexpr auto OFFSET = 10;
79
80 if (cp < LEX_CHAR_LOWERCASE_A) {
81 return cp - LEX_CHAR_UPPERCASE_A + OFFSET;
82 }
83
84 return (cp - LEX_CHAR_LOWERCASE_A + OFFSET);
85 }
86
ThrowError(std::string_view message)87 static void ThrowError(std::string_view message)
88 {
89 throw RegExpError(message);
90 }
91
ParsePattern()92 void RegExpParser::ParsePattern()
93 {
94 ParseDisjunction();
95
96 if (iter_.HasNext()) {
97 ThrowError("Invalid closing parenthesis");
98 }
99 ValidateNamedGroupReferences();
100 }
101
ParseDisjunction()102 void RegExpParser::ParseDisjunction()
103 {
104 while (true) {
105 ParseAlternatives();
106
107 if (Peek() != LEX_CHAR_VLINE) {
108 break;
109 }
110
111 Next();
112 };
113 }
114
ParseAlternative()115 void RegExpParser::ParseAlternative()
116 {
117 switch (Peek()) {
118 case LEX_CHAR_BACKSLASH: {
119 Next();
120 char32_t cp = Peek();
121 if (cp == LEX_CHAR_LOWERCASE_B || cp == LEX_CHAR_UPPERCASE_B) {
122 /* assertion */
123 Next();
124 return;
125 }
126
127 ParseAtomEscape();
128 break;
129 }
130 case LEX_CHAR_CIRCUMFLEX:
131 case LEX_CHAR_DOLLAR_SIGN: {
132 /* assertion */
133 Next();
134 return;
135 }
136 case LEX_CHAR_LEFT_PAREN: {
137 Next();
138
139 if (Peek() != LEX_CHAR_QUESTION) {
140 ParseCapturingGroup();
141 break;
142 }
143
144 Next(); // eat '?'
145
146 char32_t cp = Next();
147 if (cp == LEX_CHAR_COLON) {
148 ParseNonCapturingGroup();
149 break;
150 }
151
152 if (cp == LEX_CHAR_EQUALS || cp == LEX_CHAR_EXCLAMATION) {
153 ParseAssertion();
154
155 if (Unicode()) {
156 return;
157 }
158
159 break;
160 }
161
162 if (cp != LEX_CHAR_LESS_THAN) {
163 ThrowError("Invalid group");
164 }
165
166 cp = Peek();
167 if (cp == LEX_CHAR_EQUALS || cp == LEX_CHAR_EXCLAMATION) {
168 Next();
169 ParseAssertion();
170 return;
171 }
172
173 ParseNamedCapturingGroup();
174 break;
175 }
176 case LEX_CHAR_LEFT_SQUARE: {
177 Next();
178 ParseCharacterClass();
179 break;
180 }
181 case LEX_CHAR_DOT: {
182 Next();
183 break;
184 }
185 default: {
186 if (ParseBracedQuantifier()) {
187 ThrowError("Invalid quantifier, nothing to repeat");
188 }
189
190 if (!ParsePatternCharacter()) {
191 ThrowError("Invalid character");
192 }
193
194 break;
195 }
196 }
197
198 ParseQuantifier();
199 }
200
ParseAlternatives()201 void RegExpParser::ParseAlternatives()
202 {
203 while (true) {
204 switch (Peek()) {
205 case util::StringView::Iterator::INVALID_CP:
206 case LEX_CHAR_RIGHT_PAREN:
207 case LEX_CHAR_VLINE: {
208 return;
209 }
210 default: {
211 ParseAlternative();
212 }
213 }
214 }
215 }
216
ParseNonCapturingGroup()217 void RegExpParser::ParseNonCapturingGroup()
218 {
219 ParseDisjunction();
220
221 if (Peek() != LEX_CHAR_RIGHT_PAREN) {
222 ThrowError("Invalid non-capturing group");
223 }
224
225 Next();
226 }
227
ParseNamedCapturingGroup()228 void RegExpParser::ParseNamedCapturingGroup()
229 {
230 util::StringView name = ParseIdent();
231
232 auto result = groupNames_.insert(name);
233 if (!result.second) {
234 ThrowError("Duplicate group name");
235 }
236
237 ParseCapturingGroup();
238 }
239
ParseCapturingGroup()240 void RegExpParser::ParseCapturingGroup()
241 {
242 capturingGroupCount_++;
243
244 ParseDisjunction();
245
246 if (Peek() != LEX_CHAR_RIGHT_PAREN) {
247 ThrowError("Invalid capturing group");
248 }
249
250 Next();
251 }
252
ParseAssertion()253 void RegExpParser::ParseAssertion()
254 {
255 ParseDisjunction();
256
257 if (Peek() != LEX_CHAR_RIGHT_PAREN) {
258 ThrowError("Invalid assertion");
259 }
260
261 Next();
262 }
263
ParseControlEscape()264 uint32_t RegExpParser::ParseControlEscape()
265 {
266 char32_t cp = Peek();
267 if ((cp < LEX_CHAR_LOWERCASE_A || cp > LEX_CHAR_LOWERCASE_Z) &&
268 (cp < LEX_CHAR_UPPERCASE_A || cp > LEX_CHAR_UPPERCASE_Z)) {
269 if (Unicode()) {
270 ThrowError("Invalid control escape");
271 }
272
273 if (cp < LEX_CHAR_0 || cp > LEX_CHAR_9) {
274 return LEX_CHAR_LOWERCASE_C;
275 }
276 }
277
278 Next();
279 constexpr auto MODULO = 32;
280 return cp % MODULO;
281 }
282
ParseClassAtom()283 char32_t RegExpParser::ParseClassAtom()
284 {
285 char32_t cp = Next();
286 if (cp != LEX_CHAR_BACKSLASH) {
287 return cp;
288 }
289
290 cp = Peek();
291 if (cp == LEX_CHAR_0) {
292 if (!Unicode()) {
293 return ParseDecimalEscape();
294 }
295
296 Next();
297
298 if (IsDecimalDigit(Peek())) {
299 ThrowError("Invalid class escape");
300 }
301
302 return LEX_CHAR_NULL;
303 }
304
305 Next();
306
307 switch (cp) {
308 case LEX_CHAR_LOWERCASE_C: {
309 return ParseControlEscape();
310 }
311 case LEX_CHAR_LOWERCASE_X: {
312 return ParseHexEscape();
313 }
314 case LEX_CHAR_LOWERCASE_U: {
315 if (!Unicode() && Peek() == LEX_CHAR_LEFT_BRACE) {
316 return cp;
317 }
318
319 return ParseUnicodeEscape();
320 }
321 case LEX_CHAR_LOWERCASE_P:
322 case LEX_CHAR_UPPERCASE_P: {
323 if (!Unicode()) {
324 return cp;
325 }
326
327 ParseUnicodePropertyEscape();
328 [[fallthrough]];
329 }
330 case LEX_CHAR_LOWERCASE_D:
331 case LEX_CHAR_UPPERCASE_D:
332 case LEX_CHAR_LOWERCASE_S:
333 case LEX_CHAR_UPPERCASE_S:
334 case LEX_CHAR_LOWERCASE_W:
335 case LEX_CHAR_UPPERCASE_W: {
336 return std::numeric_limits<uint32_t>::max();
337 }
338 case LEX_CHAR_LOWERCASE_B: {
339 return LEX_CHAR_BS;
340 }
341 case LEX_CHAR_LOWERCASE_F: {
342 return LEX_CHAR_FF;
343 }
344 case LEX_CHAR_LOWERCASE_N: {
345 return LEX_CHAR_LF;
346 }
347 case LEX_CHAR_LOWERCASE_R: {
348 return LEX_CHAR_CR;
349 }
350 case LEX_CHAR_LOWERCASE_T: {
351 return LEX_CHAR_TAB;
352 }
353 case LEX_CHAR_LOWERCASE_V: {
354 return LEX_CHAR_VT;
355 }
356 case LEX_CHAR_MINUS: {
357 return cp;
358 }
359 default: {
360 if (Unicode() && !IsSyntaxCharacter(cp) && cp != LEX_CHAR_SLASH) {
361 ThrowError("Invalid escape");
362 }
363
364 return cp;
365 }
366 }
367
368 return cp;
369 }
370
IsClassEscape(uint32_t cp)371 static bool IsClassEscape(uint32_t cp)
372 {
373 return cp == std::numeric_limits<uint32_t>::max();
374 }
375
ParseCharacterClass()376 void RegExpParser::ParseCharacterClass()
377 {
378 if (Peek() == LEX_CHAR_CIRCUMFLEX) {
379 Next();
380 }
381
382 while (true) {
383 if (Peek() == LEX_CHAR_RIGHT_SQUARE) {
384 Next();
385 break;
386 }
387
388 uint32_t left = ParseClassAtom();
389
390 if (Peek() != LEX_CHAR_MINUS) {
391 continue;
392 }
393
394 Next();
395
396 if (Peek() == LEX_CHAR_RIGHT_SQUARE) {
397 Next();
398 break;
399 }
400
401 uint32_t right = ParseClassAtom();
402 if ((IsClassEscape(left) || IsClassEscape(right))) {
403 if (Unicode()) {
404 ThrowError("Invalid character class");
405 }
406
407 continue;
408 }
409
410 if (left > right) {
411 ThrowError("Class range out of order");
412 }
413 }
414 }
415
IsSyntaxCharacter(char32_t cp) const416 bool RegExpParser::IsSyntaxCharacter(char32_t cp) const
417 {
418 switch (cp) {
419 case LEX_CHAR_RIGHT_SQUARE:
420 case LEX_CHAR_LEFT_BRACE:
421 case LEX_CHAR_RIGHT_BRACE: {
422 if (!Unicode()) {
423 return false;
424 }
425
426 [[fallthrough]];
427 }
428 case LEX_CHAR_CIRCUMFLEX:
429 case LEX_CHAR_DOLLAR_SIGN:
430 case LEX_CHAR_BACKSLASH:
431 case LEX_CHAR_DOT:
432 case LEX_CHAR_ASTERISK:
433 case LEX_CHAR_PLUS:
434 case LEX_CHAR_QUESTION:
435 case LEX_CHAR_LEFT_PAREN:
436 case LEX_CHAR_RIGHT_PAREN:
437 case LEX_CHAR_LEFT_SQUARE:
438 case LEX_CHAR_VLINE: {
439 return true;
440 }
441 default: {
442 return false;
443 }
444 }
445 }
446
ParseAtomEscape()447 void RegExpParser::ParseAtomEscape()
448 {
449 char32_t cp = Peek();
450 if (IsDecimalDigit(cp)) {
451 ParseDecimalEscape();
452 return;
453 }
454
455 Next();
456
457 switch (cp) {
458 case LEX_CHAR_LOWERCASE_X: {
459 if (Unicode()) {
460 ParseHexEscape();
461 }
462 break;
463 }
464 case LEX_CHAR_LOWERCASE_U: {
465 if (Unicode()) {
466 ParseUnicodeEscape();
467 }
468 break;
469 }
470 case LEX_CHAR_LOWERCASE_K: {
471 ParseNamedBackreference();
472 break;
473 }
474 /* ControlEscape */
475 case LEX_CHAR_LOWERCASE_F:
476 case LEX_CHAR_LOWERCASE_N:
477 case LEX_CHAR_LOWERCASE_R:
478 case LEX_CHAR_LOWERCASE_T:
479 case LEX_CHAR_LOWERCASE_V:
480 /* CharacterClassEscape */
481 case LEX_CHAR_LOWERCASE_D:
482 case LEX_CHAR_UPPERCASE_D:
483 case LEX_CHAR_LOWERCASE_S:
484 case LEX_CHAR_UPPERCASE_S:
485 case LEX_CHAR_LOWERCASE_W:
486 case LEX_CHAR_UPPERCASE_W: {
487 break;
488 }
489 case LEX_CHAR_LOWERCASE_P:
490 case LEX_CHAR_UPPERCASE_P: {
491 ParseUnicodePropertyEscape();
492 break;
493 }
494 case LEX_CHAR_LOWERCASE_C: {
495 cp = Peek();
496 if ((cp < LEX_CHAR_LOWERCASE_A || cp > LEX_CHAR_LOWERCASE_Z) &&
497 (cp < LEX_CHAR_UPPERCASE_A || cp > LEX_CHAR_UPPERCASE_Z)) {
498 ThrowError("Invalid control escape");
499 }
500
501 Next();
502 break;
503 }
504 default: {
505 /* IdentityEscape */
506 if (Unicode() && !IsSyntaxCharacter(cp) && cp != LEX_CHAR_SLASH) {
507 ThrowError("Invalid escape");
508 }
509 }
510 }
511 }
512
ParseDecimalEscape()513 uint32_t RegExpParser::ParseDecimalEscape()
514 {
515 ASSERT(IsDecimalDigit(Peek()));
516
517 auto digitStart = iter_;
518 uint32_t decimalValue = DigitValue(Next());
519 if (decimalValue == 0) {
520 if (!IsDecimalDigit(Peek())) {
521 /* \0 */
522 return decimalValue;
523 }
524
525 if (Unicode()) {
526 ThrowError("Invalid decimal escape");
527 }
528
529 iter_ = digitStart;
530 return ParseLegacyOctalEscape();
531 }
532
533 constexpr auto MULTIPLIER = 10;
534
535 while (IsDecimalDigit(Peek())) {
536 uint32_t newValue = decimalValue * MULTIPLIER + DigitValue(Next());
537 if (newValue < decimalValue) {
538 ThrowError("Invalid decimal escape");
539 }
540
541 decimalValue = newValue;
542 }
543
544 if (decimalValue <= capturingGroupCount_) {
545 return decimalValue;
546 }
547
548 if (Unicode()) {
549 ThrowError("Invalid decimal escape");
550 }
551
552 iter_ = digitStart;
553
554 if (!IsOctalDigit(Peek())) {
555 /* \8 or \9 */
556 return DigitValue(Next());
557 }
558
559 return ParseLegacyOctalEscape();
560 }
561
ParseLegacyOctalEscape()562 uint32_t RegExpParser::ParseLegacyOctalEscape()
563 {
564 ASSERT(IsOctalDigit(Peek()));
565 uint32_t octalValue = DigitValue(Next());
566
567 if (!IsOctalDigit(Peek())) {
568 return octalValue;
569 }
570
571 octalValue = octalValue * 8 + DigitValue(Next());
572
573 if (!IsOctalDigit(Peek())) {
574 return octalValue;
575 }
576
577 uint32_t newValue = octalValue * 8 + DigitValue(Peek());
578 constexpr uint32_t MAX_OCTAL_VALUE = 0xFF;
579
580 if (newValue <= MAX_OCTAL_VALUE) {
581 octalValue = newValue;
582 Next();
583 }
584
585 return octalValue;
586 }
587
ParseHexEscape()588 uint32_t RegExpParser::ParseHexEscape()
589 {
590 // two hexadecimal digits after x in the regular expression
591 char32_t digit = Next();
592 if (!IsHexDigit(digit)) {
593 ThrowError("Invalid hex escape");
594 }
595
596 constexpr auto MULTIPLIER = 16;
597 uint32_t cpValue = HexValue(digit) * MULTIPLIER;
598
599 digit = Next();
600 if (!IsHexDigit(digit)) {
601 ThrowError("Invalid hex escape");
602 }
603
604 cpValue += HexValue(digit);
605 return cpValue;
606 }
607
ParseUnicodeDigits()608 uint32_t RegExpParser::ParseUnicodeDigits()
609 {
610 uint32_t value = 0;
611 uint32_t count = 4;
612
613 while (count--) {
614 char32_t digit = Next();
615 if (!IsHexDigit(digit)) {
616 ThrowError("Invalid Unicode escape");
617 }
618
619 constexpr auto MULTIPLIER = 16;
620 value = value * MULTIPLIER + HexValue(digit);
621 }
622
623 return value;
624 }
625
ParseUnicodeEscape()626 uint32_t RegExpParser::ParseUnicodeEscape()
627 {
628 uint32_t value = 0;
629
630 if (Peek() == LEX_CHAR_LEFT_BRACE) {
631 Next();
632 if (!IsHexDigit(Peek())) {
633 ThrowError("Invalid Unicode escape");
634 }
635
636 while (IsHexDigit(Peek())) {
637 constexpr auto MULTIPLIER = 16;
638 value = value * MULTIPLIER + HexValue(Next());
639 constexpr uint32_t CODE_POINT_MAX = 0x10FFFF;
640
641 if (value > CODE_POINT_MAX) {
642 ThrowError("Invalid Unicode escape");
643 }
644 }
645
646 if (Peek() != LEX_CHAR_RIGHT_BRACE) {
647 ThrowError("Invalid Unicode escape");
648 }
649
650 Next();
651 } else {
652 value = ParseUnicodeDigits();
653 if (Unicode() && util::StringView::IsHighSurrogate(value)) {
654 auto pos = iter_;
655
656 if (Next() == LEX_CHAR_BACKSLASH && Next() == LEX_CHAR_LOWERCASE_U) {
657 uint32_t next = ParseUnicodeDigits();
658 if (util::StringView::IsLowSurrogate(next)) {
659 return util::StringView::DecodeSurrogates(value, next);
660 }
661 }
662
663 iter_ = pos;
664 }
665 }
666
667 return value;
668 }
669
ParseUnicodePropertyEscape()670 void RegExpParser::ParseUnicodePropertyEscape()
671 {
672 if (!Unicode()) {
673 return;
674 }
675
676 if (Peek() != LEX_CHAR_LEFT_BRACE) {
677 ThrowError("Invalid Unicode property escape");
678 }
679
680 Next();
681
682 while (true) {
683 if (!iter_.HasNext()) {
684 ThrowError("Unterminated Unicode property escape");
685 }
686
687 char32_t ch = Next();
688 if (ch == LEX_CHAR_RIGHT_BRACE) {
689 break;
690 }
691
692 /* TODO(dbatyai): Parse and valide Unicode property names */
693 }
694 }
695
ParseNamedBackreference()696 void RegExpParser::ParseNamedBackreference()
697 {
698 if (groupNames_.empty()) {
699 /* Identity escape */
700 return;
701 }
702
703 if (Next() != LEX_CHAR_LESS_THAN) {
704 ThrowError("Invalid named backreference");
705 }
706
707 util::StringView name = ParseIdent();
708 namedGroupReferences_.insert(name);
709 }
710
ValidateNamedGroupReferences()711 void RegExpParser::ValidateNamedGroupReferences()
712 {
713 for (auto& ref : namedGroupReferences_) {
714 auto result = groupNames_.find(ref);
715 if (result == groupNames_.end()) {
716 ThrowError("Invalid named capture referenced");
717 }
718 }
719 }
720
ParseQuantifier()721 void RegExpParser::ParseQuantifier()
722 {
723 switch (Peek()) {
724 case LEX_CHAR_ASTERISK:
725 case LEX_CHAR_PLUS:
726 case LEX_CHAR_QUESTION: {
727 Next();
728 break;
729 }
730 case LEX_CHAR_LEFT_BRACE: {
731 if (!ParseBracedQuantifier()) {
732 return;
733 }
734
735 break;
736 }
737 default: {
738 return;
739 }
740 }
741
742 if (Peek() == LEX_CHAR_QUESTION) {
743 Next();
744 }
745 }
746
ParseBracedQuantifier()747 bool RegExpParser::ParseBracedQuantifier()
748 {
749 if (Peek() != LEX_CHAR_LEFT_BRACE) {
750 return false;
751 }
752
753 auto startPos = iter_;
754 Next();
755
756 if (!IsDecimalDigit(Peek())) {
757 iter_ = startPos;
758 return false;
759 }
760
761 uint32_t leftValue = 0;
762 constexpr auto MULTIPLIER = 10;
763
764 while (IsDecimalDigit(Peek())) {
765 uint32_t newValue = leftValue * MULTIPLIER + DigitValue(Next());
766 if (newValue < leftValue) {
767 leftValue = std::numeric_limits<uint32_t>::max();
768 continue;
769 }
770
771 leftValue = newValue;
772 }
773
774 if (Peek() == LEX_CHAR_COMMA) {
775 Next();
776 }
777
778 if (Peek() == LEX_CHAR_RIGHT_BRACE) {
779 Next();
780 return true;
781 }
782
783 if (IsDecimalDigit(Peek())) {
784 uint32_t rightValue = 0;
785 while (IsDecimalDigit(Peek())) {
786 uint32_t newValue = rightValue * MULTIPLIER + DigitValue(Next());
787 if (newValue < rightValue) {
788 rightValue = std::numeric_limits<uint32_t>::max();
789 continue;
790 }
791
792 rightValue = newValue;
793 }
794
795 if (Peek() == LEX_CHAR_RIGHT_BRACE) {
796 if (rightValue < leftValue) {
797 ThrowError("Quantifier range out of order");
798 }
799
800 Next();
801 return true;
802 }
803 }
804
805 iter_ = startPos;
806 return false;
807 }
808
ParsePatternCharacter()809 bool RegExpParser::ParsePatternCharacter()
810 {
811 char32_t cp = Peek();
812 if (IsSyntaxCharacter(cp)) {
813 return false;
814 }
815
816 Next();
817 return true;
818 }
819
IsIdStart(uint32_t cp)820 static bool IsIdStart(uint32_t cp)
821 {
822 auto uchar = static_cast<UChar32>(cp);
823 return u_isIDStart(uchar) != 0 || cp == LEX_CHAR_DOLLAR_SIGN || cp == LEX_CHAR_UNDERSCORE;
824 }
825
IsIdCont(uint32_t cp)826 static bool IsIdCont(uint32_t cp)
827 {
828 auto uchar = static_cast<UChar32>(cp);
829 return u_isIDPart(uchar) != 0 || cp == LEX_CHAR_DOLLAR_SIGN || cp == LEX_CHAR_ZWNJ || cp == LEX_CHAR_ZWJ;
830 }
831
ParseIdent()832 util::StringView RegExpParser::ParseIdent()
833 {
834 char32_t cp = Next();
835 if (cp == LEX_CHAR_BACKSLASH) {
836 if (Next() != LEX_CHAR_LOWERCASE_U) {
837 ThrowError("Invalid group name");
838 }
839
840 if (!Unicode() && Peek() == LEX_CHAR_LEFT_BRACE) {
841 ThrowError("Invalid Unicode escape");
842 }
843
844 cp = ParseUnicodeEscape();
845 }
846
847 if (!IsIdStart(cp)) {
848 ThrowError("Invalid group name");
849 }
850
851 util::UString ident(allocator_);
852 ident.Append(cp);
853
854 while (true) {
855 cp = Next();
856 if (cp == LEX_CHAR_GREATER_THAN) {
857 break;
858 }
859
860 if (cp == LEX_CHAR_BACKSLASH) {
861 if (Next() != LEX_CHAR_LOWERCASE_U) {
862 ThrowError("Invalid group name");
863 }
864
865 if (!Unicode() && Peek() == LEX_CHAR_LEFT_BRACE) {
866 ThrowError("Invalid Unicode escape");
867 }
868
869 cp = ParseUnicodeEscape();
870 }
871
872 if (!IsIdCont(cp)) {
873 ThrowError("Invalid group name");
874 }
875
876 ident.Append(cp);
877 }
878
879 return ident.View();
880 }
881
882 } // namespace panda::es2panda::lexer
883