• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecmascript/regexp/regexp_parser.h"
17 
18 #include "ecmascript/base/string_helper.h"
19 #include "ecmascript/ecma_macros.h"
20 #include "ecmascript/regexp/regexp_opcode.h"
21 #include "libpandabase/utils/utils.h"
22 #include "securec.h"
23 #include "unicode/uniset.h"
24 
25 #define _NO_DEBUG_
26 
27 namespace panda::ecmascript {
28 static RangeSet g_rangeD(0x30, 0x39);  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
29 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
30 static RangeSet g_rangeS({
31     std::pair<uint32_t, uint32_t>(0x0009, 0x000D),  // NOLINTNEXTLINE(readability-magic-numbers)
32     std::pair<uint32_t, uint32_t>(0x0020, 0x0020),  // NOLINTNEXTLINE(readability-magic-numbers)
33     std::pair<uint32_t, uint32_t>(0x00A0, 0x00A0),  // NOLINTNEXTLINE(readability-magic-numbers)
34     std::pair<uint32_t, uint32_t>(0x1680, 0x1680),  // NOLINTNEXTLINE(readability-magic-numbers)
35     std::pair<uint32_t, uint32_t>(0x2000, 0x200A),  // NOLINTNEXTLINE(readability-magic-numbers)
36     /* 2028;LINE SEPARATOR;Zl;0;WS;;;;;N;;;;; */
37     /* 2029;PARAGRAPH SEPARATOR;Zp;0;B;;;;;N;;;;; */
38     std::pair<uint32_t, uint32_t>(0x2028, 0x2029),  // NOLINTNEXTLINE(readability-magic-numbers)
39     std::pair<uint32_t, uint32_t>(0x202F, 0x202F),  // NOLINTNEXTLINE(readability-magic-numbers)
40     std::pair<uint32_t, uint32_t>(0x205F, 0x205F),  // NOLINTNEXTLINE(readability-magic-numbers)
41     std::pair<uint32_t, uint32_t>(0x3000, 0x3000),  // NOLINTNEXTLINE(readability-magic-numbers)
42     /* FEFF;ZERO WIDTH NO-BREAK SPACE;Cf;0;BN;;;;;N;BYTE ORDER MARK;;;; */
43     std::pair<uint32_t, uint32_t>(0xFEFF, 0xFEFF),  // NOLINTNEXTLINE(readability-magic-numbers)
44 });
45 
46 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
47 static RangeSet g_rangeW({
48     std::pair<uint32_t, uint32_t>(0x0030, 0x0039),  // NOLINTNEXTLINE(readability-magic-numbers)
49     std::pair<uint32_t, uint32_t>(0x0041, 0x005A),  // NOLINTNEXTLINE(readability-magic-numbers)
50     std::pair<uint32_t, uint32_t>(0x005F, 0x005F),  // NOLINTNEXTLINE(readability-magic-numbers)
51     std::pair<uint32_t, uint32_t>(0x0061, 0x007A),  // NOLINTNEXTLINE(readability-magic-numbers)
52 });
53 
54 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
55 static RangeSet g_regexpIdentifyStart({
56     std::pair<uint32_t, uint32_t>(0x0024, 0x0024),  // NOLINTNEXTLINE(readability-magic-numbers)
57     std::pair<uint32_t, uint32_t>(0x0041, 0x005A),  // NOLINTNEXTLINE(readability-magic-numbers)
58     std::pair<uint32_t, uint32_t>(0x0061, 0x007A),  // NOLINTNEXTLINE(readability-magic-numbers)
59 });
60 
61 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
62 static RangeSet g_regexpIdentifyContinue({
63     std::pair<uint32_t, uint32_t>(0x0024, 0x0024),  // NOLINTNEXTLINE(readability-magic-numbers)
64     std::pair<uint32_t, uint32_t>(0x0030, 0x0039),  // NOLINTNEXTLINE(readability-magic-numbers)
65     std::pair<uint32_t, uint32_t>(0x0041, 0x005A),  // NOLINTNEXTLINE(readability-magic-numbers)
66     std::pair<uint32_t, uint32_t>(0x0061, 0x007A),  // NOLINTNEXTLINE(readability-magic-numbers)
67 });
68 
Parse()69 void RegExpParser::Parse()
70 {
71     // dynbuffer head init [size,capture_count,statck_count,flags]
72     buffer_.EmitU32(0);
73     buffer_.EmitU32(0);
74     buffer_.EmitU32(0);
75     buffer_.EmitU32(0);
76     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
77     PrintF("Parse Pattern------\n");
78     // Pattern[U, N]::
79     //      Disjunction[?U, ?N]
80     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
81     Advance();
82     SaveStartOpCode saveStartOp;
83     int captureIndex = captureCount_++;
84     saveStartOp.EmitOpCode(&buffer_, captureIndex);
85     ParseDisjunction(false);
86     if (c0_ != KEY_EOF) {
87         ParseError("extraneous characters at the end");
88         return;
89     }
90     SaveEndOpCode saveEndOp;
91     saveEndOp.EmitOpCode(&buffer_, captureIndex);
92     MatchEndOpCode matchEndOp;
93     matchEndOp.EmitOpCode(&buffer_, 0);
94     // dynbuffer head assignments
95     buffer_.PutU32(0, buffer_.size_);
96     buffer_.PutU32(NUM_CAPTURE__OFFSET, captureCount_);
97     buffer_.PutU32(NUM_STACK_OFFSET, stackCount_);
98     buffer_.PutU32(FLAGS_OFFSET, flags_);
99 #ifndef _NO_DEBUG_
100     RegExpOpCode::DumpRegExpOpCode(std::cout, buffer_);
101 #endif
102 }
103 
ParseDisjunction(bool isBackward)104 void RegExpParser::ParseDisjunction(bool isBackward)
105 {
106     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
107     PrintF("Parse Disjunction------\n");
108     size_t start = buffer_.size_;
109     ParseAlternative(isBackward);
110     if (isError_) {
111         return;
112     }
113     do {
114         if (c0_ == '|') {
115             SplitNextOpCode splitOp;
116             uint32_t len = buffer_.size_ - start;
117             GotoOpCode gotoOp;
118             splitOp.InsertOpCode(&buffer_, start, len + gotoOp.GetSize());
119             uint32_t pos = gotoOp.EmitOpCode(&buffer_, 0) - gotoOp.GetSize();
120             Advance();
121             ParseAlternative(isBackward);
122             gotoOp.UpdateOpPara(&buffer_, pos, buffer_.size_ - pos - gotoOp.GetSize());
123         }
124     } while (c0_ != KEY_EOF && c0_ != ')');
125 }
126 
ParseOctalLiteral()127 uint32_t RegExpParser::ParseOctalLiteral()
128 {
129     // For compatibility with some other browsers (not all), we parse
130     // up to three octal digits with a value below 256.
131     // ES#prod-annexB-LegacyOctalEscapeSequence
132     uint32_t value = c0_ - '0';
133     Advance();
134     if (c0_ >= '0' && c0_ <= '7') {
135         value = value * OCTAL_VALUE + c0_ - '0';
136         Advance();
137         if (value < OCTAL_VALUE_RANGE && c0_ >= '0' && c0_ <= '7') {
138             value = value * OCTAL_VALUE + c0_ - '0';
139             Advance();
140         }
141     }
142     return value;
143 }
144 
ParseUnlimitedLengthHexNumber(uint32_t maxValue,uint32_t * value)145 bool RegExpParser::ParseUnlimitedLengthHexNumber(uint32_t maxValue, uint32_t *value)
146 {
147     uint32_t x = 0;
148     int d = HexValue(c0_);
149     if (d < 0) {
150         return false;
151     }
152     while (d >= 0) {
153         if (UNLIKELY(x > (std::numeric_limits<uint32_t>::max() - d) / HEX_VALUE)) {
154             LOG_ECMA(FATAL) << "value overflow";
155             return false;
156         }
157         x = x * HEX_VALUE + d;
158         if (x > maxValue) {
159             return false;
160         }
161         Advance();
162         d = HexValue(c0_);
163     }
164     *value = x;
165     return true;
166 }
167 
168 // This parses RegExpUnicodeEscapeSequence as described in ECMA262.
ParseUnicodeEscape(uint32_t * value)169 bool RegExpParser::ParseUnicodeEscape(uint32_t *value)
170 {
171     // Accept both \uxxxx and \u{xxxxxx} (if allowed).
172     // In the latter case, the number of hex digits between { } is arbitrary.
173     // \ and u have already been read.
174     if (c0_ == '{' && IsUtf16()) {
175         uint8_t *start = pc_ - 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
176         Advance();
177         if (ParseUnlimitedLengthHexNumber(0x10FFFF, value)) {  // NOLINTNEXTLINE(readability-magic-numbers)
178             if (c0_ == '}') {
179                 Advance();
180                 return true;
181             }
182         }
183         pc_ = start;
184         Advance();
185         return false;
186     }
187     // \u but no {, or \u{...} escapes not allowed.
188     bool result = ParseHexEscape(UNICODE_HEX_VALUE, value);
189     if (result && IsUtf16() && U16_IS_LEAD(*value) && c0_ == '\\') {
190         // Attempt to read trail surrogate.
191         uint8_t *start = pc_ - 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
192         if (*pc_ == 'u') {
193             Advance(UNICODE_HEX_ADVANCE);
194             uint32_t trail;
195             if (ParseHexEscape(UNICODE_HEX_VALUE, &trail) && U16_IS_TRAIL(trail)) {
196                 *value = U16_GET_SUPPLEMENTARY((*value), (trail));  // NOLINTNEXTLINE(hicpp-signed-bitwise)
197                 return true;
198             }
199         }
200         pc_ = start;
201         Advance();
202     }
203     return result;
204 }
205 
ParseHexEscape(int length,uint32_t * value)206 bool RegExpParser::ParseHexEscape(int length, uint32_t *value)
207 {
208     uint8_t *start = pc_ - 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
209     uint32_t val = 0;
210     for (int i = 0; i < length; ++i) {
211         uint32_t c = c0_;
212         int d = HexValue(c);
213         if (d < 0) {
214             pc_ = start;
215             Advance();
216             return false;
217         }
218         val = val * HEX_VALUE + d;
219         Advance();
220     }
221     *value = val;
222     return true;
223 }
224 
225 // NOLINTNEXTLINE(readability-function-size)
ParseAlternative(bool isBackward)226 void RegExpParser::ParseAlternative(bool isBackward)
227 {
228     size_t start = buffer_.size_;
229     while (c0_ != '|' && c0_ != KEY_EOF && c0_ != ')') {
230         if (isError_) {
231             return;
232         }
233         size_t atomBcStart = buffer_.GetSize();
234         int captureIndex = 0;
235         bool isAtom = false;
236         switch (c0_) {
237             case '^': {
238                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
239                 PrintF("Assertion %c line start \n", c0_);
240                 LineStartOpCode lineStartOp;
241                 lineStartOp.EmitOpCode(&buffer_, 0);
242                 Advance();
243                 break;
244             }
245             case '$': {
246                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
247                 PrintF("Assertion %c line end \n", c0_);
248                 LineEndOpCode lineEndOp;
249                 lineEndOp.EmitOpCode(&buffer_, 0);
250                 Advance();
251                 break;
252             }
253             case '\\': {
254                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
255                 PrintF("Escape %c \n", c0_);
256                 Advance();
257                 switch (c0_) {
258                     case 'b': {
259                         // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
260                         PrintF("Assertion %c \n", c0_);
261                         WordBoundaryOpCode wordBoundaryOp;
262                         wordBoundaryOp.EmitOpCode(&buffer_, 0);
263                         Advance();
264                         break;
265                     }
266                     case 'B': {
267                         // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
268                         PrintF("Assertion %c \n", c0_);
269                         NotWordBoundaryOpCode notWordBoundaryOp;
270                         notWordBoundaryOp.EmitOpCode(&buffer_, 0);
271                         Advance();
272                         break;
273                     }
274                     default: {
275                         isAtom = true;
276                         int atomValue = ParseAtomEscape(isBackward);
277                         if (atomValue != -1) {
278                             if (IsIgnoreCase()) {
279                                 if (!IsUtf16()) {
280                                     atomValue = Canonicalize(atomValue, false);
281                                 } else {
282                                     icu::UnicodeSet set(atomValue, atomValue);
283                                     set.closeOver(USET_CASE_INSENSITIVE);
284                                     set.removeAllStrings();
285                                     int32_t size = set.size();
286                                     RangeOpCode rangeOp;
287                                     RangeSet rangeResult;
288                                     for (int32_t idx = 0; idx < size; idx++) {
289                                         int32_t uc = set.charAt(idx);
290                                         RangeSet curRange(uc);
291                                         rangeResult.Insert(curRange);
292                                     }
293                                     rangeOp.InsertOpCode(&buffer_, rangeResult);
294                                     break;
295                                 }
296                             }
297                             if (atomValue <= UINT16_MAX) {
298                                 CharOpCode charOp;
299                                 charOp.EmitOpCode(&buffer_, atomValue);
300                             } else {
301                                 Char32OpCode charOp;
302                                 charOp.EmitOpCode(&buffer_, atomValue);
303                             }
304                         }
305                         break;
306                     }
307                 }
308                 break;
309             }
310             case '(': {
311                 Advance();
312                 isAtom = ParseAssertionCapture(&captureIndex, isBackward);
313                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
314                 Advance();
315                 break;
316             }
317             case '.': {
318                 PrevOpCode prevOp;
319                 if (isBackward) {
320                     prevOp.EmitOpCode(&buffer_, 0);
321                 }
322                 if (IsDotAll()) {
323                     AllOpCode allOp;
324                     allOp.EmitOpCode(&buffer_, 0);
325                 } else {
326                     DotsOpCode dotsOp;
327                     dotsOp.EmitOpCode(&buffer_, 0);
328                 }
329                 if (isBackward) {
330                     prevOp.EmitOpCode(&buffer_, 0);
331                 }
332                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
333                 PrintF("Atom %c match any \n", c0_);
334                 isAtom = true;
335                 Advance();
336                 break;
337             }
338             case '[': {
339                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
340                 PrintF("Atom %c match range \n", c0_);
341                 isAtom = true;
342                 PrevOpCode prevOp;
343                 Advance();
344                 if (isBackward) {
345                     prevOp.EmitOpCode(&buffer_, 0);
346                 }
347                 bool isInvert = false;
348                 if (c0_ == '^') {
349                     isInvert = true;
350                     Advance();
351                 }
352                 RangeSet rangeResult;
353                 if (!ParseClassRanges(&rangeResult)) {
354                     break;
355                 }
356                 if (isInvert) {
357                     rangeResult.Invert(IsUtf16());
358                 }
359                 uint32_t highValue = rangeResult.HighestValue();
360                 if (highValue <= UINT16_MAX) {
361                     RangeOpCode rangeOp;
362                     rangeOp.InsertOpCode(&buffer_, rangeResult);
363                 } else {
364                     Range32OpCode rangeOp;
365                     rangeOp.InsertOpCode(&buffer_, rangeResult);
366                 }
367 
368                 if (isBackward) {
369                     prevOp.EmitOpCode(&buffer_, 0);
370                 }
371                 break;
372             }
373             case '*':
374             case '+':
375             case '?': {
376                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
377                 ParseError("nothing to repeat");
378                 return;
379             }
380             case '{': {
381                 uint8_t *begin = pc_ - 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
382                 int dummy;
383                 if (ParserIntervalQuantifier(&dummy, &dummy)) {
384                     ParseError("nothing to repeat");
385                     return;
386                 }
387                 pc_ = begin;
388                 Advance();
389                 [[fallthrough]];
390             }
391             case '}':
392             case ']': {
393                 if (IsUtf16()) {
394                     ParseError("syntax error");
395                     return;
396                 }
397                 [[fallthrough]];
398             }
399             default: {
400                 // PatternCharacter
401                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
402                 PrintF("PatternCharacter %c\n", c0_);
403                 isAtom = true;
404                 {
405                     PrevOpCode prevOp;
406                     if (isBackward) {
407                         prevOp.EmitOpCode(&buffer_, 0);
408                     }
409                     uint32_t matchedChar = c0_;
410                     if (c0_ > (INT8_MAX + 1)) {
411                         Prev();
412                         int i = 0;
413                         UChar32 c;
414                         int32_t length = end_ - pc_ + 1;
415                         // NOLINTNEXTLINE(hicpp-signed-bitwise)
416                         U8_NEXT(pc_, i, length, c);  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
417                         matchedChar = c;
418                         pc_ += i;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
419                     }
420                     if (IsIgnoreCase()) {
421                         matchedChar = Canonicalize(matchedChar, IsUtf16());
422                     }
423                     if (matchedChar > UINT16_MAX) {
424                         Char32OpCode charOp;
425                         charOp.EmitOpCode(&buffer_, matchedChar);
426                     } else {
427                         CharOpCode charOp;
428                         charOp.EmitOpCode(&buffer_, matchedChar);
429                     }
430                     if (isBackward) {
431                         prevOp.EmitOpCode(&buffer_, 0);
432                     }
433                 }
434                 Advance();
435                 break;
436             }
437         }
438         if (isAtom && !isError_) {
439             ParseQuantifier(atomBcStart, captureIndex, captureCount_ - 1);
440         }
441         if (isBackward) {
442             size_t end = buffer_.GetSize();
443             size_t termSize = end - atomBcStart;
444             size_t moveSize = end - start;
445             buffer_.Expand(end + termSize);
446             if (memmove_s(buffer_.buf_ + start +  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
447                               termSize,           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
448                           moveSize,
449                           buffer_.buf_ + start,  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
450                           moveSize) != EOK) {
451                 LOG_ECMA(FATAL) << "memmove_s failed";
452                 UNREACHABLE();
453             }
454             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
455             if (memcpy_s(buffer_.buf_ + start, termSize, buffer_.buf_ + end, termSize) != EOK) {
456                 LOG_ECMA(FATAL) << "memcpy_s failed";
457                 UNREACHABLE();
458             }
459         }
460     }
461 }
462 
FindGroupName(const CString & name)463 int RegExpParser::FindGroupName(const CString &name)
464 {
465     size_t len = 0;
466     size_t nameLen = name.size();
467     const char *p = reinterpret_cast<char *>(groupNames_.buf_);
468     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
469     const char *bufEnd = reinterpret_cast<char *>(groupNames_.buf_) + groupNames_.size_;
470     int captureIndex = 1;
471     while (p < bufEnd) {
472         len = strlen(p);
473         if (len == nameLen && memcmp(name.c_str(), p, nameLen) == 0) {
474             return captureIndex;
475         }
476         p += len + 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
477         captureIndex++;
478     }
479     return -1;
480 }
481 
ParseAssertionCapture(int * captureIndex,bool isBackward)482 bool RegExpParser::ParseAssertionCapture(int *captureIndex, bool isBackward)
483 {
484     bool isAtom = false;
485     do {
486         if (c0_ == '?') {
487             Advance();
488             switch (c0_) {
489                 // (?=Disjunction[?U, ?N])
490                 case '=': {
491                     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
492                     PrintF("Assertion(?= Disjunction)\n");
493                     Advance();
494                     uint32_t start = buffer_.size_;
495                     ParseDisjunction(isBackward);
496                     MatchOpCode matchOp;
497                     matchOp.EmitOpCode(&buffer_, 0);
498                     MatchAheadOpCode matchAheadOp;
499                     uint32_t len = buffer_.size_ - start;
500                     matchAheadOp.InsertOpCode(&buffer_, start, len);
501                     break;
502                 }
503                 // (?!Disjunction[?U, ?N])
504                 case '!': {
505                     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
506                     PrintF("Assertion(?! Disjunction)\n");
507                     uint32_t start = buffer_.size_;
508                     Advance();
509                     ParseDisjunction(isBackward);
510                     MatchOpCode matchOp;
511                     matchOp.EmitOpCode(&buffer_, 0);
512                     NegativeMatchAheadOpCode matchAheadOp;
513                     uint32_t len = buffer_.size_ - start;
514                     matchAheadOp.InsertOpCode(&buffer_, start, len);
515                     break;
516                 }
517                 case '<': {
518                     Advance();
519                     // (?<=Disjunction[?U, ?N])
520                     if (c0_ == '=') {
521                         // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
522                         PrintF("Assertion(?<= Disjunction)\n");
523                         Advance();
524                         uint32_t start = buffer_.size_;
525                         ParseDisjunction(true);
526                         MatchOpCode matchOp;
527                         matchOp.EmitOpCode(&buffer_, 0);
528                         MatchAheadOpCode matchAheadOp;
529                         uint32_t len = buffer_.size_ - start;
530                         matchAheadOp.InsertOpCode(&buffer_, start, len);
531                         // (?<!Disjunction[?U, ?N])
532                     } else if (c0_ == '!') {
533                         // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
534                         PrintF("Assertion(?<! Disjunction)\n");
535                         Advance();
536                         uint32_t start = buffer_.size_;
537                         ParseDisjunction(true);
538                         MatchOpCode matchOp;
539                         matchOp.EmitOpCode(&buffer_, 0);
540                         NegativeMatchAheadOpCode matchAheadOp;
541                         uint32_t len = buffer_.size_ - start;
542                         matchAheadOp.InsertOpCode(&buffer_, start, len);
543                     } else {
544                         Prev();
545                         CString name;
546                         auto **pp = const_cast<const uint8_t **>(&pc_);
547                         if (!ParseGroupSpecifier(pp, name)) {
548                             ParseError("GroupName Syntax error.");
549                             return false;
550                         }
551                         if (FindGroupName(name) > 0) {
552                             ParseError("Duplicate GroupName error.");
553                             return false;
554                         }
555                         groupNames_.EmitStr(name.c_str());
556                         // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
557                         PrintF("group name %s", name.c_str());
558                         Advance();
559                         goto parseCapture;  // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto)
560                     }
561                     break;
562                 }
563                 // (?:Disjunction[?U, ?N])
564                 case ':': {
565                     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
566                     PrintF("Atom(?<: Disjunction)\n");
567                     isAtom = true;
568                     Advance();
569                     ParseDisjunction(isBackward);
570                     break;
571                 }
572                 default: {
573                     Advance();
574                     ParseError("? Syntax error.");
575                     return false;
576                 }
577             }
578         } else {
579             groupNames_.EmitChar(0);
580         parseCapture:
581             isAtom = true;
582             *captureIndex = captureCount_++;
583             SaveEndOpCode saveEndOp;
584             SaveStartOpCode saveStartOp;
585             if (isBackward) {
586                 saveEndOp.EmitOpCode(&buffer_, *captureIndex);
587             } else {
588                 saveStartOp.EmitOpCode(&buffer_, *captureIndex);
589             }
590             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
591             PrintF("capture start %d \n", *captureIndex);
592             ParseDisjunction(isBackward);
593             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
594             PrintF("capture end %d \n", *captureIndex);
595             if (isBackward) {
596                 saveStartOp.EmitOpCode(&buffer_, *captureIndex);
597             } else {
598                 saveEndOp.EmitOpCode(&buffer_, *captureIndex);
599             }
600         }
601     } while (c0_ != ')' && c0_ != KEY_EOF);
602     if (c0_ != ')') {
603         ParseError("capture syntax error");
604         return false;
605     }
606     return isAtom;
607 }
608 
ParseDecimalDigits()609 int RegExpParser::ParseDecimalDigits()
610 {
611     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
612     PrintF("Parse DecimalDigits------\n");
613     int result = 0;
614     bool overflow = false;
615     while (true) {
616         if (c0_ < '0' || c0_ > '9') {
617             break;
618         }
619         if (!overflow) {
620             if (UNLIKELY(result > (INT32_MAX - c0_ + '0') / DECIMAL_DIGITS_ADVANCE)) {
621                 overflow = true;
622             } else {
623                 result = result * DECIMAL_DIGITS_ADVANCE + c0_ - '0';
624             }
625         }
626         Advance();
627     }
628     if (overflow) {
629         return INT32_MAX;
630     }
631     return result;
632 }
633 
ParserIntervalQuantifier(int * pmin,int * pmax)634 bool RegExpParser::ParserIntervalQuantifier(int *pmin, int *pmax)
635 {
636     // Quantifier::
637     //     QuantifierPrefix
638     //     QuantifierPrefix?
639     // QuantifierPrefix::
640     // *
641     // +
642     // ?
643     // {DecimalDigits}
644     // {DecimalDigits,}
645     // {DecimalDigits,DecimalDigits}
646     Advance();
647     *pmin = ParseDecimalDigits();
648     *pmax = *pmin;
649     switch (c0_) {
650         case ',': {
651             Advance();
652             if (c0_ == '}') {
653                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
654                 PrintF("QuantifierPrefix{DecimalDigits,}\n");
655                 *pmax = INT32_MAX;
656                 Advance();
657             } else {
658                 *pmax = ParseDecimalDigits();
659                 if (c0_ == '}') {
660                     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
661                     PrintF("QuantifierPrefix{DecimalDigits,DecimalDigits}\n");
662                     Advance();
663                 } else {
664                     return false;
665                 }
666             }
667             break;
668         }
669         case '}': {
670             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
671             PrintF("QuantifierPrefix{DecimalDigits}\n");
672             Advance();
673             break;
674         }
675         default: {
676             Advance();
677             return false;
678         }
679     }
680     return true;
681 }
682 
ParseQuantifier(size_t atomBcStart,int captureStart,int captureEnd)683 void RegExpParser::ParseQuantifier(size_t atomBcStart, int captureStart, int captureEnd)
684 {
685     int min = -1;
686     int max = -1;
687     bool isGreedy = true;
688     switch (c0_) {
689         case '*':
690             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
691             PrintF("QuantifierPrefix %c\n", c0_);
692             min = 0;
693             max = INT32_MAX;
694             Advance();
695             break;
696         case '+':
697             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
698             PrintF("QuantifierPrefix %c\n", c0_);
699             min = 1;
700             max = INT32_MAX;
701             Advance();
702             break;
703         case '?':
704             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
705             PrintF("QuantifierPrefix %c\n", c0_);
706             Advance();
707             min = 0;
708             max = 1;
709             break;
710         case '{': {
711             uint8_t *start = pc_ - 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
712             if (!ParserIntervalQuantifier(&min, &max)) {
713                 pc_ = start;
714                 Advance();  // back to '{'
715                 return;
716             }
717             if (min > max) {
718                 ParseError("Invalid repetition count");
719                 return;
720             }
721             break;
722         }
723         default:
724             break;
725     }
726     if (c0_ == '?') {
727         isGreedy = false;
728         // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
729         PrintF("Quantifier::QuantifierPrefix?\n");
730         Advance();
731     } else if (c0_ == '?' || c0_ == '+' || c0_ == '*' || c0_ == '{') {
732         ParseError("nothing to repeat");
733         return;
734     }
735     if (min != -1 && max != -1) {
736         stackCount_++;
737         PushOpCode pushOp;
738         pushOp.InsertOpCode(&buffer_, atomBcStart);
739         atomBcStart += pushOp.GetSize();
740 
741         if (captureStart != 0) {
742             SaveResetOpCode saveResetOp;
743             saveResetOp.InsertOpCode(&buffer_, atomBcStart, captureStart, captureEnd);
744         }
745 
746         // zero advance check
747         if (max == INT32_MAX) {
748             stackCount_++;
749             PushCharOpCode pushCharOp;
750             pushCharOp.InsertOpCode(&buffer_, atomBcStart);
751             CheckCharOpCode checkCharOp;
752             // NOLINTNEXTLINE(readability-magic-numbers)
753             checkCharOp.EmitOpCode(&buffer_, RegExpOpCode::GetRegExpOpCode(RegExpOpCode::OP_LOOP)->GetSize());
754         }
755 
756         if (isGreedy) {
757             LoopGreedyOpCode loopOp;
758             loopOp.EmitOpCode(&buffer_, atomBcStart - buffer_.GetSize() - loopOp.GetSize(), min, max);
759         } else {
760             LoopOpCode loopOp;
761             loopOp.EmitOpCode(&buffer_, atomBcStart - buffer_.GetSize() - loopOp.GetSize(), min, max);
762         }
763 
764         if (min == 0) {
765             if (isGreedy) {
766                 SplitNextOpCode splitNextOp;
767                 splitNextOp.InsertOpCode(&buffer_, atomBcStart, buffer_.GetSize() - atomBcStart);
768             } else {
769                 SplitFirstOpCode splitFirstOp;
770                 splitFirstOp.InsertOpCode(&buffer_, atomBcStart, buffer_.GetSize() - atomBcStart);
771             }
772         }
773 
774         PopOpCode popOp;
775         popOp.EmitOpCode(&buffer_);
776     }
777 }
778 
ParseGroupSpecifier(const uint8_t ** pp,CString & name)779 bool RegExpParser::ParseGroupSpecifier(const uint8_t **pp, CString &name)
780 {
781     const uint8_t *p = *pp;
782     int c = *p;
783     while (c != '>') {
784         if (c < (INT8_MAX + 1)) {
785             if (name.empty()) {
786                 if (!g_regexpIdentifyStart.IsContain(c)) {
787                     return false;
788                 }
789             } else {
790                 if (!g_regexpIdentifyContinue.IsContain(c)) {
791                     return false;
792                 }
793             }
794             name += static_cast<char>(c);
795         }
796         c = *++p;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
797     }
798     p++;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
799     *pp = p;
800     return true;
801 }
802 
ParseCaptureCount(const char * groupName)803 int RegExpParser::ParseCaptureCount(const char *groupName)
804 {
805     const uint8_t *p;
806     int captureIndex = 1;
807     CString name;
808     for (p = base_; p < end_; p++) {  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
809         switch (*p) {
810             case '(': {
811                 if (p[1] == '?') {  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
812                     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
813                     if (p[CAPTURE_CONUT_ADVANCE - 1] == '<' && p[CAPTURE_CONUT_ADVANCE] != '!' &&
814                         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
815                         p[CAPTURE_CONUT_ADVANCE] != '=') {
816                         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
817                         p += CAPTURE_CONUT_ADVANCE;
818                         if (groupName != nullptr) {
819                             if (ParseGroupSpecifier(&p, name)) {
820                                 if (strcmp(name.c_str(), groupName) == 0) {
821                                     return captureIndex;
822                                 }
823                             }
824                         }
825                         captureIndex++;
826                     }
827                 } else {
828                     captureIndex++;
829                 }
830                 break;
831             }
832             case '\\': {
833                 p++;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
834                 break;
835             }
836             case '[': {
837                 while (p < end_ && *p != ']') {
838                     if (*p == '\\') {
839                         p++;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
840                     }
841                     p++;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
842                 }
843                 break;
844             }
845             default:
846                 break;
847         }
848     }
849     return captureIndex;
850 }
851 
852 // NOLINTNEXTLINE(readability-function-size)
ParseAtomEscape(bool isBackward)853 int RegExpParser::ParseAtomEscape(bool isBackward)
854 {
855     // AtomEscape[U, N]::
856     //     DecimalEscape
857     //     CharacterClassEscape[?U]
858     //     CharacterEscape[?U]
859     //     [+N]kGroupName[?U]
860     int result = -1;
861     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
862     PrintF("Parse AtomEscape------\n");
863     switch (c0_) {
864         case KEY_EOF: {
865             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
866             ParseError("unexpected end");
867             break;
868         }
869         // DecimalEscape
870         case '1':
871         case '2':
872         case '3':
873         case '4':
874         case '5':
875         case '6':
876         case '7':
877         case '8':
878         case '9': {
879             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
880             PrintF("NonZeroDigit %c\n", c0_);
881             int capture = ParseDecimalDigits();
882             if (capture > captureCount_ - 1 && capture > ParseCaptureCount(nullptr) - 1) {
883                 ParseError("invalid backreference count");
884                 break;
885             }
886             if (isBackward) {
887                 BackwardBackReferenceOpCode backReferenceOp;
888                 backReferenceOp.EmitOpCode(&buffer_, capture);
889             } else {
890                 BackReferenceOpCode backReferenceOp;
891                 backReferenceOp.EmitOpCode(&buffer_, capture);
892             }
893             break;
894         }
895         // CharacterClassEscape
896         case 'd': {
897             // [0-9]
898             RangeOpCode rangeOp;
899             rangeOp.InsertOpCode(&buffer_, g_rangeD);
900             Advance();
901             break;
902         }
903         case 'D': {
904             // [^0-9]
905             RangeSet atomRange(g_rangeD);
906             atomRange.Invert(IsUtf16());
907             Range32OpCode rangeOp;
908             rangeOp.InsertOpCode(&buffer_, atomRange);
909             Advance();
910             break;
911         }
912         case 's': {
913             // [\f\n\r\t\v]
914             RangeOpCode rangeOp;
915             rangeOp.InsertOpCode(&buffer_, g_rangeS);
916             Advance();
917             break;
918         }
919         case 'S': {
920             RangeSet atomRange(g_rangeS);
921             atomRange.Invert(IsUtf16());
922             Range32OpCode rangeOp;
923             rangeOp.InsertOpCode(&buffer_, atomRange);
924             Advance();
925             break;
926         }
927         case 'w': {
928             // [A-Za-z0-9]
929             RangeOpCode rangeOp;
930             rangeOp.InsertOpCode(&buffer_, g_rangeW);
931             Advance();
932             break;
933         }
934         case 'W': {
935             // [^A-Za-z0-9]
936             RangeSet atomRange(g_rangeW);
937             atomRange.Invert(IsUtf16());
938             Range32OpCode rangeOp;
939             rangeOp.InsertOpCode(&buffer_, atomRange);
940             Advance();
941             break;
942         }
943         // P{UnicodePropertyValueExpression}
944         // p{UnicodePropertyValueExpression}
945         case 'P':
946         case 'p':
947         // [+N]kGroupName[?U]
948         case 'k':
949         default: {
950             result = ParseCharacterEscape();
951             break;
952         }
953     }
954     return result;
955 }
956 
ParseCharacterEscape()957 int RegExpParser::ParseCharacterEscape()
958 {
959     // CharacterEscape[U]::
960     //     ControlEscape
961     //     c ControlLetter
962     //     0 [lookahead ∉ DecimalDigit]
963     //     HexEscapeSequence
964     //     RegExpUnicodeEscapeSequence[?U]
965     //     IdentityEscape[?U]
966     uint32_t result = 0;
967     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
968     switch (c0_) {
969         // ControlEscape
970         case 'f': {
971             result = '\f';
972             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
973             PrintF("ControlEscape %c\n", c0_);
974             Advance();
975             break;
976         }
977         case 'n': {
978             result = '\n';
979             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
980             PrintF("ControlEscape %c\n", c0_);
981             Advance();
982             break;
983         }
984         case 'r': {
985             result = '\r';
986             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
987             PrintF("ControlEscape %c\n", c0_);
988             Advance();
989             break;
990         }
991         case 't': {
992             result = '\t';
993             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
994             PrintF("ControlEscape %c\n", c0_);
995             Advance();
996             break;
997         }
998         case 'v': {
999             result = '\v';
1000             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1001             PrintF("ControlEscape %c\n", c0_);
1002             Advance();
1003             break;
1004         }
1005         // c ControlLetter
1006         case 'c': {
1007             Advance();
1008             if ((c0_ >= 'A' && c0_ <= 'Z') || (c0_ >= 'a' && c0_ <= 'z')) {
1009                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1010                 PrintF("ControlLetter %c\n", c0_);
1011                 result = static_cast<uint32_t>(c0_) & 0x1f;  // NOLINTNEXTLINE(readability-magic-numbers)
1012                 Advance();
1013             } else {
1014                 if (!IsUtf16()) {
1015                     pc_--;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1016                     result = '\\';
1017                 } else {
1018                     ParseError("Invalid control letter");
1019                     return -1;
1020                 }
1021             }
1022             break;
1023         }
1024         case '0': {
1025             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1026             PrintF("CharacterEscape 0 [lookahead ∉ DecimalDigit]\n");
1027             if (IsUtf16() && !(*pc_ >= '0' && *pc_ <= '9')) {  // NOLINTNEXTLINE(readability-magic-numbers)
1028                 Advance();
1029                 result = 0;
1030                 break;
1031             }
1032             [[fallthrough]];
1033         }
1034         case '1':
1035         case '2':
1036         case '3':
1037         case '4':
1038         case '5':
1039         case '6':
1040         case '7': {
1041             if (IsUtf16()) {
1042                 // With /u, decimal escape is not interpreted as octal character code.
1043                 ParseError("Invalid class escape");
1044                 return 0;
1045             }
1046             result = ParseOctalLiteral();
1047             break;
1048         }
1049         // ParseHexEscapeSequence
1050         // ParseRegExpUnicodeEscapeSequence
1051         case 'x': {
1052             Advance();
1053             if (ParseHexEscape(UNICODE_HEX_ADVANCE, &result)) {
1054                 return result;
1055             }
1056             if (IsUtf16()) {
1057                 ParseError("Invalid class escape");
1058                 return -1;
1059             }
1060             result = 'x';
1061             break;
1062         }
1063         case 'u': {
1064             Advance();
1065             if (ParseUnicodeEscape(&result)) {
1066                 return result;
1067             }
1068             if (IsUtf16()) {
1069                 // With /u, invalid escapes are not treated as identity escapes.
1070                 ParseError("Invalid unicode escape");
1071                 return 0;
1072             }
1073             // If \u is not followed by a two-digit hexadecimal, treat it
1074             // as an identity escape.
1075             result = 'u';
1076             break;
1077         }
1078         // IdentityEscape[?U]
1079         case '$':
1080         case '(':
1081         case ')':
1082         case '*':
1083         case '+':
1084         case '.':
1085         case '/':
1086         case '?':
1087         case '[':
1088         case '\\':
1089         case ']':
1090         case '^':
1091         case '{':
1092         case '|':
1093         case '}': {
1094             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1095             PrintF("IdentityEscape %c\n", c0_);
1096             result = c0_;
1097             Advance();
1098             break;
1099         }
1100         default: {
1101             if (IsUtf16()) {
1102                 ParseError("Invalid unicode escape");
1103                 return 0;
1104             }
1105             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1106             PrintF("SourceCharacter %c\n", c0_);
1107             result = c0_;
1108             Advance();
1109             break;
1110         }
1111     }
1112     return result;
1113 }
1114 
ParseClassRanges(RangeSet * result)1115 bool RegExpParser::ParseClassRanges(RangeSet *result)
1116 {
1117     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1118     PrintF("Parse ClassRanges------\n");
1119     while (c0_ != ']') {
1120         RangeSet s1;
1121         uint32_t c1 = ParseClassAtom(&s1);
1122         if (c1 == UINT32_MAX) {
1123             ParseError("invalid class range");
1124             return false;
1125         }
1126 
1127         int next_c0 = *pc_;
1128         if (c0_ == '-' && next_c0 != ']') {
1129             if (c1 == CLASS_RANGE_BASE) {
1130                 if (IsUtf16()) {
1131                     ParseError("invalid class range");
1132                     return false;
1133                 }
1134                 result->Insert(s1);
1135                 continue;
1136             }
1137             Advance();
1138             RangeSet s2;
1139             uint32_t c2 = ParseClassAtom(&s2);
1140             if (c2 == UINT32_MAX) {
1141                 ParseError("invalid class range");
1142                 return false;
1143             }
1144             if (c2 == CLASS_RANGE_BASE) {
1145                 if (IsUtf16()) {
1146                     ParseError("invalid class range");
1147                     return false;
1148                 }
1149                 result->Insert(s2);
1150                 continue;
1151             }
1152 
1153             if (c1 > c2) {
1154                 ParseError("invalid class range");
1155                 return false;
1156             }
1157             if (IsIgnoreCase()) {
1158                 c1 = Canonicalize(c1, IsUtf16());
1159                 c2 = Canonicalize(c2, IsUtf16());
1160             }
1161 
1162             result->Insert(c1, c2);
1163         } else {
1164             result->Insert(s1);
1165         }
1166     }
1167     Advance();
1168     return true;
1169 }
1170 
ParseClassAtom(RangeSet * atom)1171 uint32_t RegExpParser::ParseClassAtom(RangeSet *atom)
1172 {
1173     uint32_t ret = UINT32_MAX;
1174     switch (c0_) {
1175         case '\\': {
1176             Advance();
1177             ret = ParseClassEscape(atom);
1178             break;
1179         }
1180         case KEY_EOF:
1181             break;
1182         case 0: {
1183             if (pc_ >= end_) {
1184                 return UINT32_MAX;
1185             }
1186             [[fallthrough]];
1187         }
1188         default: {
1189             uint32_t value = c0_;
1190             int u16_size = 0;
1191             if (c0_ > INT8_MAX) {  // NOLINTNEXTLINE(readability-magic-numbers)
1192                 pc_ -= 1;          // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1193                 auto u16_result = base::utf_helper::ConvertUtf8ToUtf16Pair(pc_, true);
1194                 value = u16_result.first;
1195                 u16_size = u16_result.second;
1196                 Advance(u16_size + 1);
1197             } else {
1198                 Advance();
1199             }
1200             if (IsIgnoreCase()) {
1201                 value = Canonicalize(value, IsUtf16());
1202             }
1203             atom->Insert(RangeSet(value));
1204             ret = value;
1205             break;
1206         }
1207     }
1208     return ret;
1209 }
1210 
ParseClassEscape(RangeSet * atom)1211 int RegExpParser::ParseClassEscape(RangeSet *atom)
1212 {
1213     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1214     PrintF("Parse ClassEscape------\n");
1215     int result = -1;
1216     switch (c0_) {
1217         case 'b': {
1218             Advance();
1219             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1220             PrintF("ClassEscape %c", 'b');
1221             result = '\b';
1222             atom->Insert(RangeSet(static_cast<uint32_t>('\b')));
1223             break;
1224         }
1225         case '-': {
1226             Advance();
1227             result = '-';
1228             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1229             PrintF("ClassEscape %c", '-');
1230             atom->Insert(RangeSet(static_cast<uint32_t>('-')));
1231             break;
1232         }
1233         // CharacterClassEscape
1234         case 'd':
1235         case 'D': {
1236             result = CLASS_RANGE_BASE;
1237             atom->Insert(g_rangeD);
1238             if (c0_ == 'D') {
1239                 atom->Invert(IsUtf16());
1240             }
1241             Advance();
1242             break;
1243         }
1244         case 's':
1245         case 'S': {
1246             result = CLASS_RANGE_BASE;
1247             atom->Insert(g_rangeS);
1248             if (c0_ == 'S') {
1249                 atom->Invert(IsUtf16());
1250             }
1251             Advance();
1252             break;
1253         }
1254         case 'w':
1255         case 'W': {
1256             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1257             PrintF("ClassEscape::CharacterClassEscape %c\n", c0_);
1258             result = CLASS_RANGE_BASE;
1259             atom->Insert(g_rangeW);
1260             if (c0_ == 'W') {
1261                 atom->Invert(IsUtf16());
1262             }
1263             Advance();
1264             break;
1265         }
1266         // P{UnicodePropertyValueExpression}
1267         // p{UnicodePropertyValueExpression}
1268         case 'P':
1269         case 'p': {
1270             Advance();
1271             if (c0_ == '{') {
1272                 Advance();
1273                 bool isValue = false;
1274                 ParseUnicodePropertyValueCharacters(&isValue);
1275                 if (!isValue) {
1276                     ParseUnicodePropertyValueCharacters(&isValue);
1277                 }
1278             }
1279             break;
1280         }
1281         default: {
1282             result = ParseCharacterEscape();
1283             int value = result;
1284             if (IsIgnoreCase()) {
1285                 value = Canonicalize(value, IsUtf16());
1286             }
1287             atom->Insert(RangeSet(static_cast<uint32_t>(value)));
1288             break;
1289         }
1290     }
1291     return result;
1292 }
1293 
ParseUnicodePropertyValueCharacters(bool * isValue)1294 void RegExpParser::ParseUnicodePropertyValueCharacters(bool *isValue)
1295 {
1296     if ((c0_ >= 'A' && c0_ <= 'Z') || (c0_ >= 'a' && c0_ <= 'z')) {
1297         // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1298         PrintF("UnicodePropertyCharacter::ControlLetter %c\n", c0_);
1299         Advance();
1300     } else if (c0_ == '-') {
1301         // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1302         PrintF("UnicodePropertyCharacter:: - \n");
1303         Advance();
1304     } else if (c0_ >= '0' && c0_ <= '9') {
1305         *isValue = true;
1306         // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1307         PrintF("UnicodePropertyValueCharacter::DecimalDigit %c\n", c0_);
1308         Advance();
1309     } else if (*isValue && c0_ == '}') {
1310         Advance();
1311         return;
1312     } else if (!*isValue && c0_ == '=') {
1313         Advance();
1314         return;
1315     }
1316     ParseUnicodePropertyValueCharacters(isValue);
1317 }
1318 
1319 // NOLINTNEXTLINE(cert-dcl50-cpp)
PrintF(const char * fmt,...)1320 void RegExpParser::PrintF(const char *fmt, ...)
1321 {
1322 #ifndef _NO_DEBUG_
1323     va_list args;
1324     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,)
1325     va_start(args, fmt);
1326     vprintf(fmt, args);
1327     va_end(args);
1328 #else
1329     (void)fmt;
1330 #endif
1331 }
1332 
ParseError(const char * errorMessage)1333 void RegExpParser::ParseError(const char *errorMessage)
1334 {
1335     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1336     PrintF("error: ");
1337     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1338     PrintF(errorMessage);
1339     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1340     PrintF("\n");
1341     SetIsError();
1342     size_t length = strlen(errorMessage) + 1;
1343     if (memcpy_s(errorMsg_, length, errorMessage, length) != EOK) {
1344         LOG_ECMA(FATAL) << "memcpy_s failed";
1345         UNREACHABLE();
1346     }
1347 }
1348 }  // namespace panda::ecmascript
1349