• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecmascript/regexp/regexp_parser.h"
17 
18 #include "ecmascript/base/string_helper.h"
19 #include "libpandabase/utils/utils.h"
20 #define _NO_DEBUG_
21 
22 namespace panda::ecmascript {
23 static constexpr uint32_t CACHE_SIZE = 128;
24 static constexpr uint32_t CHAR_MAXS = 128;
25 static constexpr uint32_t ID_START_TABLE_ASCII[4] = {
26     /* $ A-Z _ a-z */
27     0x00000000, 0x00000010, 0x87FFFFFE, 0x07FFFFFE
28 };
29 static RangeSet g_rangeD(0x30, 0x39);  // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
30 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
31 static RangeSet g_rangeS({
32     std::pair<uint32_t, uint32_t>(0x0009, 0x000D),  // NOLINTNEXTLINE(readability-magic-numbers)
33     std::pair<uint32_t, uint32_t>(0x0020, 0x0020),  // NOLINTNEXTLINE(readability-magic-numbers)
34     std::pair<uint32_t, uint32_t>(0x00A0, 0x00A0),  // NOLINTNEXTLINE(readability-magic-numbers)
35     std::pair<uint32_t, uint32_t>(0x1680, 0x1680),  // NOLINTNEXTLINE(readability-magic-numbers)
36     std::pair<uint32_t, uint32_t>(0x2000, 0x200A),  // NOLINTNEXTLINE(readability-magic-numbers)
37     /* 2028;LINE SEPARATOR;Zl;0;WS;;;;;N;;;;; */
38     /* 2029;PARAGRAPH SEPARATOR;Zp;0;B;;;;;N;;;;; */
39     std::pair<uint32_t, uint32_t>(0x2028, 0x2029),  // NOLINTNEXTLINE(readability-magic-numbers)
40     std::pair<uint32_t, uint32_t>(0x202F, 0x202F),  // NOLINTNEXTLINE(readability-magic-numbers)
41     std::pair<uint32_t, uint32_t>(0x205F, 0x205F),  // NOLINTNEXTLINE(readability-magic-numbers)
42     std::pair<uint32_t, uint32_t>(0x3000, 0x3000),  // NOLINTNEXTLINE(readability-magic-numbers)
43     /* FEFF;ZERO WIDTH NO-BREAK SPACE;Cf;0;BN;;;;;N;BYTE ORDER MARK;;;; */
44     std::pair<uint32_t, uint32_t>(0xFEFF, 0xFEFF),  // NOLINTNEXTLINE(readability-magic-numbers)
45 });
46 
47 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
48 static RangeSet g_rangeW({
49     std::pair<uint32_t, uint32_t>(0x0030, 0x0039),  // NOLINTNEXTLINE(readability-magic-numbers)
50     std::pair<uint32_t, uint32_t>(0x0041, 0x005A),  // NOLINTNEXTLINE(readability-magic-numbers)
51     std::pair<uint32_t, uint32_t>(0x005F, 0x005F),  // NOLINTNEXTLINE(readability-magic-numbers)
52     std::pair<uint32_t, uint32_t>(0x0061, 0x007A),  // NOLINTNEXTLINE(readability-magic-numbers)
53 });
54 
55 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
56 static RangeSet g_regexpIdentifyStart({
57     std::pair<uint32_t, uint32_t>(0x0024, 0x0024),  // NOLINTNEXTLINE(readability-magic-numbers)
58     std::pair<uint32_t, uint32_t>(0x0041, 0x005A),  // NOLINTNEXTLINE(readability-magic-numbers)
59     std::pair<uint32_t, uint32_t>(0x0061, 0x007A),  // NOLINTNEXTLINE(readability-magic-numbers)
60 });
61 
62 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
63 static RangeSet g_regexpIdentifyContinue({
64     std::pair<uint32_t, uint32_t>(0x0024, 0x0024),  // NOLINTNEXTLINE(readability-magic-numbers)
65     std::pair<uint32_t, uint32_t>(0x0030, 0x0039),  // NOLINTNEXTLINE(readability-magic-numbers)
66     std::pair<uint32_t, uint32_t>(0x0041, 0x005A),  // NOLINTNEXTLINE(readability-magic-numbers)
67     std::pair<uint32_t, uint32_t>(0x0061, 0x007A),  // NOLINTNEXTLINE(readability-magic-numbers)
68 });
69 
Parse()70 void RegExpParser::Parse()
71 {
72     // dynbuffer head init [size,capture_count,statck_count,flags,prefilter]
73     buffer_.EmitU32(0);
74     buffer_.EmitU32(0);
75     buffer_.EmitU32(0);
76     buffer_.EmitU32(0);
77     buffer_.EmitU32(0);
78     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
79     PrintF("Parse Pattern------\n");
80     // Pattern[U, N]::
81     //      Disjunction[?U, ?N]
82     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
83     Advance();
84     SaveStartOpCode saveStartOp;
85     int captureIndex = captureCount_++;
86     saveStartOp.EmitOpCode(&buffer_, captureIndex);
87     ParseDisjunction(false);
88     if (isError_) {
89         return;
90     }
91     if (c0_ != KEY_EOF) {
92         ParseError("extraneous characters at the end");
93         return;
94     }
95     SaveEndOpCode saveEndOp;
96     saveEndOp.EmitOpCode(&buffer_, captureIndex);
97     MatchEndOpCode matchEndOp;
98     matchEndOp.EmitOpCode(&buffer_, 0);
99 
100     uint32_t ptr = RegExpParser::OP_START_OFFSET;
101     ptr += static_cast<uint32_t>(RegExpOpCode::GetRegExpOpCode(RegExpOpCode::OP_SAVE_START)->GetSize());
102     uint8_t opCode = buffer_.GetU8(ptr);
103     uint16_t expectedChar = 0;
104     if (opCode == RegExpOpCode::OP_CHAR && !IsIgnoreCase()) {
105         expectedChar = buffer_.GetU16(ptr + 1);
106         if (expectedChar > UINT8_MAX) {
107             expectedChar = 0;
108         }
109     }
110 
111     // dynbuffer head assignments
112     buffer_.PutU32(0, buffer_.size_);
113     buffer_.PutU32(NUM_CAPTURE__OFFSET, captureCount_);
114     buffer_.PutU32(NUM_STACK_OFFSET, stackCount_);
115     buffer_.PutU32(FLAGS_OFFSET, flags_);
116     buffer_.PutU32(PREFILTER_OFFSET, expectedChar);
117 #ifndef _NO_DEBUG_
118     RegExpOpCode::DumpRegExpOpCode(std::cout, buffer_, buffer_.GetSize());
119 #endif
120 }
121 
ParseDisjunction(bool isBackward)122 void RegExpParser::ParseDisjunction(bool isBackward)
123 {
124     // check stack overflow because infinite recursion may occur
125     DoParserStackOverflowCheck("invalid regular expression.");
126     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
127     PrintF("Parse Disjunction------\n");
128     if (c0_ == ')') {
129         isEmpty_ = true;
130         return;
131     }
132     size_t start = buffer_.size_;
133     ParseAlternative(isBackward);
134     if (isError_) {
135         return;
136     }
137     uint32_t para = RegExpOpCode::INVALID_PARA;
138     do {
139         if (c0_ == '|') {
140             SplitNextOpCode splitOp;
141             uint32_t len = buffer_.size_ - start;
142             GotoOpCode gotoOp;
143             splitOp.InsertOpCode(&buffer_, start, len + gotoOp.GetSize());
144             uint32_t pos = gotoOp.EmitOpCode(&buffer_, 0) - gotoOp.GetSize();
145             gotoOp.UpdateOpPara(&buffer_, pos, para);
146             Advance();
147             ParseAlternative(isBackward);
148             para = buffer_.size_ - pos - gotoOp.GetSize();
149             if (c0_ != '|') {
150                 uint16_t cnt = 0;
151                 uint32_t opCharSize =
152                     static_cast<uint32_t>(RegExpOpCode::GetRegExpOpCode(RegExpOpCode::OP_CHAR)->GetSize());
153                 uint32_t opSplitSize =
154                     static_cast<uint32_t>(RegExpOpCode::GetRegExpOpCode(RegExpOpCode::OP_SPLIT_NEXT)->GetSize());
155                 std::vector<uint16_t> chars;
156                 std::vector<uint32_t> offsets;
157                 std::set<uint16_t> checkSet;
158                 uint32_t ptr = start;
159                 bool isSparseable = true;
160                 do {
161                     uint8_t opCode = buffer_.GetU8(ptr);
162                     uint32_t offset = 0;
163                     uint32_t branch = ptr;
164                     bool isLastBranch = false;
165                     if (opCode == RegExpOpCode::OP_SPLIT_NEXT) {
166                         offset = buffer_.GetU32(ptr + 1);
167                         branch = ptr + offset + opSplitSize;
168                     } else {
169                         isLastBranch = true;
170                     }
171                     uint8_t opCodeChar = buffer_.GetU8(branch);
172                     if (opCodeChar == RegExpOpCode::OP_CHAR) {
173                         chars.push_back(buffer_.GetU16(branch + 1));
174                         offsets.push_back(offset);
175                         if (checkSet.find(chars[cnt]) != checkSet.end()) {
176                             isSparseable = false;
177                             break;
178                         }
179                         checkSet.insert(chars[cnt]);
180                     } else {
181                         isSparseable = false;
182                         break;
183                     }
184                     cnt++;
185                     if (isLastBranch) {
186                         break;
187                     }
188                     ptr += opSplitSize;
189                 } while (true);
190 
191                 if (isSparseable) {
192                     uint32_t sparseLen = SPARSE_HEAD_OFFSET + static_cast<uint32_t>(cnt) * SPARSE_MAX_OFFSET;
193                     uint32_t splitsLen = static_cast<uint32_t>(cnt - 1) * opSplitSize;
194                     ptr = start;
195                     buffer_.Insert(start, sparseLen - splitsLen);
196                     pos += sparseLen - splitsLen;
197                     buffer_.PutU8(ptr, RegExpOpCode::OP_SPARSE);
198                     buffer_.PutU16(ptr + 1, cnt);
199                     ptr += SPARSE_HEAD_OFFSET;
200                     ASSERT(chars.size() > 0);
201                     for (int32_t i = static_cast<int32_t>(chars.size() - 1); i >= 0; i--) {
202                         buffer_.PutU16(ptr, chars[i]);
203                         // 2: cnt = count of splits + 1, for invert index should be extra - 1, so -1-1=-2
204                         offsets[i] += opCharSize - opSplitSize * std::max(0, cnt - i -2);
205                         buffer_.PutU32(ptr + SPARSE_OFF_OFFSET, offsets[i]);
206                         ptr += SPARSE_MAX_OFFSET;
207                     }
208                 }
209                 bool isEnd = false;
210                 do {
211                     uint32_t paraTmp = buffer_.GetU32(pos + 1);
212                     if (paraTmp == RegExpOpCode::INVALID_PARA) {
213                         isEnd = true;
214                     }
215                     buffer_.PutU32(pos + 1, para);
216                     para += paraTmp + gotoOp.GetSize();
217                     pos -= paraTmp + gotoOp.GetSize();
218                 } while (!isEnd);
219             }
220             if (isError_) {
221                 return;
222             }
223         }
224     } while (c0_ != KEY_EOF && c0_ != ')');
225 }
226 
ParseOctalLiteral()227 uint32_t RegExpParser::ParseOctalLiteral()
228 {
229     // For compatibility with some other browsers (not all), we parse
230     // up to three octal digits with a value below 256.
231     // ES#prod-annexB-LegacyOctalEscapeSequence
232     uint32_t value = c0_ - '0';
233     Advance();
234     if (c0_ >= '0' && c0_ <= '7') {
235         value = value * OCTAL_VALUE + c0_ - '0';
236         Advance();
237         if (value < OCTAL_VALUE_RANGE && c0_ >= '0' && c0_ <= '7') {
238             value = value * OCTAL_VALUE + c0_ - '0';
239             Advance();
240         }
241     }
242     return value;
243 }
244 
ParseUnlimitedLengthHexNumber(uint32_t maxValue,uint32_t * value)245 bool RegExpParser::ParseUnlimitedLengthHexNumber(uint32_t maxValue, uint32_t *value)
246 {
247     uint32_t x = 0;
248     int d = static_cast<int>(HexValue(c0_));
249     if (d < 0) {
250         return false;
251     }
252     while (d >= 0) {
253         if (UNLIKELY(x > (std::numeric_limits<uint32_t>::max() - static_cast<uint32_t>(d)) / HEX_VALUE)) {
254             LOG_FULL(FATAL) << "value overflow";
255             return false;
256         }
257         x = x * HEX_VALUE + static_cast<uint32_t>(d);
258         if (x > maxValue) {
259             return false;
260         }
261         Advance();
262         d = static_cast<int>(HexValue(c0_));
263     }
264     *value = x;
265     return true;
266 }
267 
268 // This parses RegExpUnicodeEscapeSequence as described in ECMA262.
ParseUnicodeEscape(uint32_t * value)269 bool RegExpParser::ParseUnicodeEscape(uint32_t *value)
270 {
271     // Accept both \uxxxx and \u{xxxxxx} (if allowed).
272     // In the latter case, the number of hex digits between { } is arbitrary.
273     // \ and u have already been read.
274     if (c0_ == '{' && IsUtf16()) {
275         uint8_t *start = pc_ - 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
276         Advance();
277         if (ParseUnlimitedLengthHexNumber(0x10FFFF, value)) {  // NOLINTNEXTLINE(readability-magic-numbers)
278             if (c0_ == '}') {
279                 Advance();
280                 return true;
281             }
282         }
283         pc_ = start;
284         Advance();
285         return false;
286     }
287     // \u but no {, or \u{...} escapes not allowed.
288     bool result = ParseHexEscape(UNICODE_HEX_VALUE, value);
289     if (result && IsUtf16() && U16_IS_LEAD(*value) && c0_ == '\\') {
290         // Attempt to read trail surrogate.
291         uint8_t *start = pc_ - 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
292         if (*pc_ == 'u') {
293             Advance(UNICODE_HEX_ADVANCE);
294             uint32_t trail = 0;
295             if (ParseHexEscape(UNICODE_HEX_VALUE, &trail) && U16_IS_TRAIL(trail)) {
296                 *value = U16_GET_SUPPLEMENTARY((*value), (trail));  // NOLINTNEXTLINE(hicpp-signed-bitwise)
297                 return true;
298             }
299         }
300         pc_ = start;
301         Advance();
302     }
303     return result;
304 }
305 
ParseHexEscape(int length,uint32_t * value)306 bool RegExpParser::ParseHexEscape(int length, uint32_t *value)
307 {
308     uint8_t *start = pc_ - 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
309     uint32_t val = 0;
310     for (int i = 0; i < length; ++i) {
311         uint32_t c = c0_;
312         int d = static_cast<int>(HexValue(c));
313         if (d < 0) {
314             pc_ = start;
315             Advance();
316             return false;
317         }
318         val = val * HEX_VALUE + static_cast<uint32_t>(d);
319         Advance();
320     }
321     *value = val;
322     return true;
323 }
324 
325 // NOLINTNEXTLINE(readability-function-size)
ParseAlternative(bool isBackward)326 void RegExpParser::ParseAlternative(bool isBackward)
327 {
328     size_t start = buffer_.size_;
329     while (c0_ != '|' && c0_ != KEY_EOF && c0_ != ')') {
330         if (isError_) {
331             return;
332         }
333         size_t atomBcStart = buffer_.GetSize();
334         int captureIndex = 0;
335         bool isAtom = false;
336         switch (c0_) {
337             case '^': {
338                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
339                 PrintF("Assertion %c line start \n", c0_);
340                 LineStartOpCode lineStartOp;
341                 lineStartOp.EmitOpCode(&buffer_, 0);
342                 Advance();
343                 break;
344             }
345             case '$': {
346                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
347                 PrintF("Assertion %c line end \n", c0_);
348                 LineEndOpCode lineEndOp;
349                 lineEndOp.EmitOpCode(&buffer_, 0);
350                 Advance();
351                 break;
352             }
353             case '\\': {
354                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
355                 PrintF("Escape %c \n", c0_);
356                 Advance();
357                 switch (c0_) {
358                     case 'b': {
359                         // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
360                         PrintF("Assertion %c \n", c0_);
361                         WordBoundaryOpCode wordBoundaryOp;
362                         wordBoundaryOp.EmitOpCode(&buffer_, 0);
363                         Advance();
364                         break;
365                     }
366                     case 'B': {
367                         // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
368                         PrintF("Assertion %c \n", c0_);
369                         NotWordBoundaryOpCode notWordBoundaryOp;
370                         notWordBoundaryOp.EmitOpCode(&buffer_, 0);
371                         Advance();
372                         break;
373                     }
374                     default: {
375                         isAtom = true;
376                         int atomValue = ParseAtomEscape(isBackward);
377                         if (atomValue != -1) {
378                             PrevOpCode prevOp;
379                             if (isBackward) {
380                                 prevOp.EmitOpCode(&buffer_, 0);
381                             }
382                             if (IsIgnoreCase()) {
383                                 if (!IsUtf16()) {
384                                     atomValue = Canonicalize(atomValue, false);
385                                 } else {
386                                     icu::UnicodeSet set(atomValue, atomValue);
387                                     set.closeOver(USET_CASE_INSENSITIVE);
388                                     set.removeAllStrings();
389                                     uint32_t size = static_cast<uint32_t>(set.size());
390                                     RangeOpCode rangeOp;
391                                     RangeSet rangeResult;
392                                     for (uint32_t idx = 0; idx < size; idx++) {
393                                         int32_t uc = set.charAt(idx);
394                                         RangeSet curRange(uc);
395                                         rangeResult.Insert(curRange);
396                                     }
397                                     rangeOp.InsertOpCode(&buffer_, rangeResult);
398                                     break;
399                                 }
400                             }
401                             if (atomValue <= UINT16_MAX) {
402                                 CharOpCode charOp;
403                                 charOp.EmitOpCode(&buffer_, atomValue);
404                             } else {
405                                 Char32OpCode charOp;
406                                 charOp.EmitOpCode(&buffer_, atomValue);
407                             }
408                             if (isBackward) {
409                                 prevOp.EmitOpCode(&buffer_, 0);
410                             }
411                         }
412                         break;
413                     }
414                 }
415                 break;
416             }
417             case '(': {
418                 Advance();
419                 isAtom = ParseAssertionCapture(&captureIndex, isBackward);
420                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
421                 Advance();
422                 break;
423             }
424             case '.': {
425                 PrevOpCode prevOp;
426                 if (isBackward) {
427                     prevOp.EmitOpCode(&buffer_, 0);
428                 }
429                 if (IsDotAll()) {
430                     AllOpCode allOp;
431                     allOp.EmitOpCode(&buffer_, 0);
432                 } else {
433                     DotsOpCode dotsOp;
434                     dotsOp.EmitOpCode(&buffer_, 0);
435                 }
436                 if (isBackward) {
437                     prevOp.EmitOpCode(&buffer_, 0);
438                 }
439                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
440                 PrintF("Atom %c match any \n", c0_);
441                 isAtom = true;
442                 Advance();
443                 break;
444             }
445             case '[': {
446                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
447                 PrintF("Atom %c match range \n", c0_);
448                 isAtom = true;
449                 PrevOpCode prevOp;
450                 Advance();
451                 if (isBackward) {
452                     prevOp.EmitOpCode(&buffer_, 0);
453                 }
454                 bool isInvert = false;
455                 if (c0_ == '^') {
456                     isInvert = true;
457                     Advance();
458                 }
459                 RangeSet rangeResult;
460                 if (!ParseClassRanges(&rangeResult)) {
461                     break;
462                 }
463                 if (isInvert) {
464                     rangeResult.Invert(IsUtf16());
465                 }
466                 uint32_t highValue = rangeResult.HighestValue();
467                 if (highValue <= UINT16_MAX) {
468                     RangeOpCode rangeOp;
469                     rangeOp.InsertOpCode(&buffer_, rangeResult);
470                 } else {
471                     Range32OpCode rangeOp;
472                     rangeOp.InsertOpCode(&buffer_, rangeResult);
473                 }
474 
475                 if (isBackward) {
476                     prevOp.EmitOpCode(&buffer_, 0);
477                 }
478                 break;
479             }
480             case '*':
481             case '+':
482             case '?':
483                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
484                 ParseError("nothing to repeat");
485                 return;
486             case '{': {
487                 uint8_t *begin = pc_ - 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
488                 int dummy;
489                 if (ParserIntervalQuantifier(&dummy, &dummy)) {
490                     ParseError("nothing to repeat");
491                     return;
492                 }
493                 pc_ = begin;
494                 Advance();
495             }
496                 [[fallthrough]];
497             case '}':
498             case ']':
499                 if (IsUtf16()) {
500                     ParseError("syntax error");
501                     return;
502                 }
503                 [[fallthrough]];
504             default: {
505                 // PatternCharacter
506                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
507                 PrintF("PatternCharacter %c\n", c0_);
508                 isAtom = true;
509                 {
510                     PrevOpCode prevOp;
511                     if (isBackward) {
512                         prevOp.EmitOpCode(&buffer_, 0);
513                     }
514                     uint32_t matchedChar = c0_;
515                     if (c0_ > (INT8_MAX + 1)) {
516                         Prev();
517                         UChar32 c;
518                         int32_t length = end_ - pc_ + 1;
519                         // NOLINTNEXTLINE(hicpp-signed-bitwise)
520                         auto unicodeChar = base::utf_helper::ConvertUtf8ToUnicodeChar(pc_, length);
521                         c = unicodeChar.first;
522                         matchedChar = static_cast<uint32_t>(c);
523                         pc_ += unicodeChar.second;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
524                     }
525                     if (IsIgnoreCase()) {
526                         matchedChar = static_cast<uint32_t>(Canonicalize(static_cast<int>(matchedChar), IsUtf16()));
527                     }
528                     if (matchedChar > UINT16_MAX) {
529                         Char32OpCode charOp;
530                         charOp.EmitOpCode(&buffer_, matchedChar);
531                     } else {
532                         CharOpCode charOp;
533                         charOp.EmitOpCode(&buffer_, matchedChar);
534                     }
535                     if (isBackward) {
536                         prevOp.EmitOpCode(&buffer_, 0);
537                     }
538                 }
539                 Advance();
540                 break;
541             }
542         }
543         if (isAtom && !isError_) {
544             ParseQuantifier(atomBcStart, captureIndex, captureCount_ - 1);
545         }
546         if (isBackward) {
547             size_t end = buffer_.GetSize();
548             size_t termSize = end - atomBcStart;
549             size_t moveSize = end - start;
550             buffer_.Expand(end + termSize);
551             if (memmove_s(buffer_.buf_ + start +  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
552                               termSize,           // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
553                           moveSize,
554                           buffer_.buf_ + start,  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
555                           moveSize) != EOK) {
556                 LOG_FULL(FATAL) << "memmove_s failed";
557                 UNREACHABLE();
558             }
559             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
560             if (memcpy_s(buffer_.buf_ + start, termSize, buffer_.buf_ + end, termSize) != EOK) {
561                 LOG_FULL(FATAL) << "memcpy_s failed";
562                 UNREACHABLE();
563             }
564         }
565     }
566 }
567 
FindGroupName(const CString & name)568 int RegExpParser::FindGroupName(const CString &name)
569 {
570     size_t len = 0;
571     size_t nameLen = name.size();
572     const char *p = reinterpret_cast<char *>(groupNames_.buf_);
573     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
574     const char *bufEnd = reinterpret_cast<char *>(groupNames_.buf_) + groupNames_.size_;
575     int captureIndex = 1;
576     while (p < bufEnd) {
577         len = strlen(p);
578         if (len == nameLen && memcmp(name.c_str(), p, nameLen) == 0) {
579             return captureIndex;
580         }
581         p += len + 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
582         captureIndex++;
583     }
584     return -1;
585 }
586 
ParseAssertionCapture(int * captureIndex,bool isBackward)587 bool RegExpParser::ParseAssertionCapture(int *captureIndex, bool isBackward)
588 {
589     bool isAtom = false;
590     do {
591         if (c0_ == '?') {
592             Advance();
593             switch (c0_) {
594                 // (?=Disjunction[?U, ?N])
595                 case '=': {
596                     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
597                     PrintF("Assertion(?= Disjunction)\n");
598                     Advance();
599                     uint32_t start = buffer_.size_;
600                     ParseDisjunction(isBackward);
601                     MatchOpCode matchOp;
602                     matchOp.EmitOpCode(&buffer_, 0);
603                     MatchAheadOpCode matchAheadOp;
604                     uint32_t len = buffer_.size_ - start;
605                     matchAheadOp.InsertOpCode(&buffer_, start, len);
606                     break;
607                 }
608                 // (?!Disjunction[?U, ?N])
609                 case '!': {
610                     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
611                     PrintF("Assertion(?! Disjunction)\n");
612                     uint32_t start = buffer_.size_;
613                     Advance();
614                     ParseDisjunction(isBackward);
615                     MatchOpCode matchOp;
616                     matchOp.EmitOpCode(&buffer_, 0);
617                     NegativeMatchAheadOpCode matchAheadOp;
618                     uint32_t len = buffer_.size_ - start;
619                     matchAheadOp.InsertOpCode(&buffer_, start, len);
620                     break;
621                 }
622                 case '<': {
623                     Advance();
624                     // (?<=Disjunction[?U, ?N])
625                     if (c0_ == '=') {
626                         // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
627                         PrintF("Assertion(?<= Disjunction)\n");
628                         Advance();
629                         uint32_t start = buffer_.size_;
630                         ParseDisjunction(true);
631                         MatchOpCode matchOp;
632                         matchOp.EmitOpCode(&buffer_, 0);
633                         MatchAheadOpCode matchAheadOp;
634                         uint32_t len = buffer_.size_ - start;
635                         matchAheadOp.InsertOpCode(&buffer_, start, len);
636                         // (?<!Disjunction[?U, ?N])
637                     } else if (c0_ == '!') {
638                         // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
639                         PrintF("Assertion(?<! Disjunction)\n");
640                         Advance();
641                         uint32_t start = buffer_.size_;
642                         ParseDisjunction(true);
643                         MatchOpCode matchOp;
644                         matchOp.EmitOpCode(&buffer_, 0);
645                         NegativeMatchAheadOpCode matchAheadOp;
646                         uint32_t len = buffer_.size_ - start;
647                         matchAheadOp.InsertOpCode(&buffer_, start, len);
648                     } else {
649                         Prev();
650                         CString name;
651                         auto **pp = const_cast<const uint8_t **>(&pc_);
652                         if (!ParseGroupSpecifier(pp, name)) {
653                             ParseError("GroupName Syntax error.");
654                             return false;
655                         }
656                         if (FindGroupName(name) > 0) {
657                             ParseError("Duplicate GroupName error.");
658                             return false;
659                         }
660                         groupNames_.EmitStr(name.c_str());
661                         newGroupNames_.push_back(name);
662                         // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
663                         PrintF("group name %s", name.c_str());
664                         Advance();
665                         goto parseCapture;  // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto)
666                     }
667                     break;
668                 }
669                 // (?:Disjunction[?U, ?N])
670                 case ':':
671                     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
672                     PrintF("Atom(?<: Disjunction)\n");
673                     isAtom = true;
674                     Advance();
675                     ParseDisjunction(isBackward);
676                     break;
677                 default:
678                     Advance();
679                     ParseError("? Syntax error.");
680                     return false;
681             }
682             if (isError_) {
683                 return false;
684             }
685         } else {
686             groupNames_.EmitChar(0);
687         parseCapture:
688             isAtom = true;
689             *captureIndex = captureCount_++;
690             SaveEndOpCode saveEndOp;
691             SaveStartOpCode saveStartOp;
692             if (isBackward) {
693                 saveEndOp.EmitOpCode(&buffer_, *captureIndex);
694             } else {
695                 saveStartOp.EmitOpCode(&buffer_, *captureIndex);
696             }
697             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
698             PrintF("capture start %d \n", *captureIndex);
699             ParseDisjunction(isBackward);
700             if (isError_) {
701                 return false;
702             }
703             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
704             PrintF("capture end %d \n", *captureIndex);
705             if (isBackward) {
706                 saveStartOp.EmitOpCode(&buffer_, *captureIndex);
707             } else {
708                 saveEndOp.EmitOpCode(&buffer_, *captureIndex);
709             }
710         }
711     } while (c0_ != ')' && c0_ != KEY_EOF);
712     if (c0_ != ')') {
713         ParseError("capture syntax error");
714         return false;
715     }
716     return isAtom;
717 }
718 
ParseDecimalDigits()719 int RegExpParser::ParseDecimalDigits()
720 {
721     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
722     PrintF("Parse DecimalDigits------\n");
723     uint32_t result = 0;
724     bool overflow = false;
725     while (true) {
726         if (c0_ < '0' || c0_ > '9') {
727             break;
728         }
729         if (!overflow) {
730             if (UNLIKELY(result > (INT32_MAX - c0_ + '0') / DECIMAL_DIGITS_ADVANCE)) {
731                 overflow = true;
732             } else {
733                 result = result * DECIMAL_DIGITS_ADVANCE + c0_ - '0';
734             }
735         }
736         Advance();
737     }
738     if (overflow) {
739         return INT32_MAX;
740     }
741     return result;
742 }
743 
ParserIntervalQuantifier(int * pmin,int * pmax)744 bool RegExpParser::ParserIntervalQuantifier(int *pmin, int *pmax)
745 {
746     // Quantifier::
747     //     QuantifierPrefix
748     //     QuantifierPrefix?
749     // QuantifierPrefix::
750     // *
751     // +
752     // ?
753     // {DecimalDigits}
754     // {DecimalDigits,}
755     // {DecimalDigits,DecimalDigits}
756     Advance();
757     *pmin = ParseDecimalDigits();
758     *pmax = *pmin;
759     switch (c0_) {
760         case ',': {
761             Advance();
762             if (c0_ == '}') {
763                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
764                 PrintF("QuantifierPrefix{DecimalDigits,}\n");
765                 *pmax = INT32_MAX;
766                 Advance();
767             } else {
768                 *pmax = ParseDecimalDigits();
769                 if (c0_ == '}') {
770                     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
771                     PrintF("QuantifierPrefix{DecimalDigits,DecimalDigits}\n");
772                     Advance();
773                 } else {
774                     return false;
775                 }
776             }
777             break;
778         }
779         case '}':
780             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
781             PrintF("QuantifierPrefix{DecimalDigits}\n");
782             Advance();
783             break;
784         default:
785             Advance();
786             return false;
787     }
788     return true;
789 }
790 
ParseQuantifier(size_t atomBcStart,int captureStart,int captureEnd)791 void RegExpParser::ParseQuantifier(size_t atomBcStart, int captureStart, int captureEnd)
792 {
793     int min = -1;
794     int max = -1;
795     bool isGreedy = true;
796     switch (c0_) {
797         case '*':
798             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
799             PrintF("QuantifierPrefix %c\n", c0_);
800             min = 0;
801             max = INT32_MAX;
802             Advance();
803             break;
804         case '+':
805             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
806             PrintF("QuantifierPrefix %c\n", c0_);
807             min = 1;
808             max = INT32_MAX;
809             Advance();
810             break;
811         case '?':
812             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
813             PrintF("QuantifierPrefix %c\n", c0_);
814             Advance();
815             min = 0;
816             max = 1;
817             break;
818         case '{': {
819             uint8_t *start = pc_ - 1;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
820             if (!ParserIntervalQuantifier(&min, &max)) {
821                 pc_ = start;
822                 Advance();  // back to '{'
823                 return;
824             }
825             if (min > max) {
826                 ParseError("Invalid repetition count");
827                 return;
828             }
829             break;
830         }
831         default:
832             break;
833     }
834     if (c0_ == '?') {
835         isGreedy = false;
836         // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
837         PrintF("Quantifier::QuantifierPrefix?\n");
838         Advance();
839     } else if (c0_ == '?' || c0_ == '+' || c0_ == '*' || c0_ == '{') {
840         ParseError("nothing to repeat");
841         return;
842     }
843 
844     if (max == 0) {
845         buffer_.size_ = atomBcStart; // Drop all unnecessary bytecode
846     } else if (min != -1 && max != -1 && !isEmpty_) {
847         bool isLoopOp = false;
848         size_t checkCharPara = SIZE_MAX;
849 
850         if (captureStart != 0) {
851             SaveResetOpCode saveResetOp;
852             saveResetOp.InsertOpCode(&buffer_, atomBcStart, captureStart, captureEnd);
853         }
854 
855         // zero advance check
856         uint8_t firstOp = buffer_.GetU8(atomBcStart);
857         if (max == INT32_MAX && firstOp != RegExpOpCode::OP_CHAR && firstOp != RegExpOpCode::OP_CHAR32 &&
858                                 firstOp != RegExpOpCode::OP_RANGE && firstOp != RegExpOpCode::OP_RANGE32 &&
859                                 firstOp != RegExpOpCode::OP_ALL && firstOp != RegExpOpCode::OP_DOTS &&
860                                 firstOp != RegExpOpCode::OP_SPARSE) {
861             stackCount_++;
862             PushCharOpCode pushCharOp;
863             pushCharOp.InsertOpCode(&buffer_, atomBcStart);
864             CheckCharOpCode checkCharOp;
865             checkCharPara = buffer_.GetSize() + 1;
866             // NOLINTNEXTLINE(readability-magic-numbers)
867             checkCharOp.EmitOpCode(&buffer_, 0);
868         }
869 
870         if (min <= 1 && max == INT32_MAX) {
871             if (checkCharPara != SIZE_MAX) {
872                 buffer_.PutU32(checkCharPara, RegExpOpCode::GetRegExpOpCode(RegExpOpCode::OP_SPLIT_NEXT)->GetSize());
873             }
874             if (isGreedy) {
875                 SplitFirstOpCode splitOp;
876                 splitOp.EmitOpCode(&buffer_, atomBcStart - buffer_.GetSize() - splitOp.GetSize());
877             } else {
878                 SplitNextOpCode splitOp;
879                 splitOp.EmitOpCode(&buffer_, atomBcStart - buffer_.GetSize() - splitOp.GetSize());
880             }
881         } else if (max > 1) {
882             if (checkCharPara != SIZE_MAX) {
883                 buffer_.PutU32(checkCharPara, RegExpOpCode::GetRegExpOpCode(RegExpOpCode::OP_LOOP)->GetSize());
884             }
885             if (isGreedy) {
886                 LoopGreedyOpCode loopOp;
887                 loopOp.EmitOpCode(&buffer_, atomBcStart - buffer_.GetSize() - loopOp.GetSize(), min, max);
888                 isLoopOp = true;
889             } else {
890                 LoopOpCode loopOp;
891                 loopOp.EmitOpCode(&buffer_, atomBcStart - buffer_.GetSize() - loopOp.GetSize(), min, max);
892                 isLoopOp = true;
893             }
894         }
895 
896         if (min == 0) {
897             if (isGreedy) {
898                 SplitNextOpCode splitNextOp;
899                 splitNextOp.InsertOpCode(&buffer_, atomBcStart, buffer_.GetSize() - atomBcStart);
900             } else {
901                 SplitFirstOpCode splitFirstOp;
902                 splitFirstOp.InsertOpCode(&buffer_, atomBcStart, buffer_.GetSize() - atomBcStart);
903             }
904         }
905         if (isLoopOp) {
906             stackCount_++;
907             PushOpCode pushOp;
908             pushOp.InsertOpCode(&buffer_, atomBcStart);
909             PopOpCode popOp;
910             popOp.EmitOpCode(&buffer_);
911         }
912     }
913     isEmpty_ = false;
914 }
915 
ParseGroupSpecifier(const uint8_t ** pp,CString & name)916 bool RegExpParser::ParseGroupSpecifier(const uint8_t **pp, CString &name)
917 {
918     const uint8_t *p = *pp;
919     uint32_t c = 0;
920     char buffer[CACHE_SIZE] = {0};
921     char *q = buffer;
922     while (true) {
923         if (p <= end_) {
924             c = *p;
925         } else {
926             c = KEY_EOF;
927         }
928         if (c == '\\') {
929             p++;
930             if (*p != 'u') {
931                 return false;
932             }
933             if (!ParseUnicodeEscape(&c)) {
934                 return false;
935             }
936         } else if (c == '>') {
937             break;
938         } else if (c > CACHE_SIZE && c != KEY_EOF) {
939             c = static_cast<uint32_t>(base::StringHelper::UnicodeFromUtf8(p, UTF8_CHAR_LEN_MAX, &p));
940         } else if (c != KEY_EOF) {
941             p++;
942         } else {
943             return false;
944         }
945         if (q == buffer) {
946             if (!IsIdentFirst(c)) {
947                 return false;
948             }
949         } else {
950             if (!u_isIDPart(c)) {
951                 return false;
952             }
953         }
954         if (q != nullptr) {
955             *q++ = c;
956         }
957     } // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
958     p++;
959     *pp = p;
960     name = buffer;
961     return true;
962 }
963 
ParseCaptureCount(const char * groupName)964 int RegExpParser::ParseCaptureCount(const char *groupName)
965 {
966     const uint8_t *p = nullptr;
967     int captureIndex = 1;
968     CString name;
969     hasNamedCaptures_ = 0;
970     for (p = base_; p < end_; p++) {  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
971         switch (*p) {
972             case '(': {
973                 if (p[1] == '?') {  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
974                     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
975                     if (p[CAPTURE_CONUT_ADVANCE - 1] == '<' && p[CAPTURE_CONUT_ADVANCE] != '!' &&
976                         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
977                         p[CAPTURE_CONUT_ADVANCE] != '=') {
978                         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
979                         hasNamedCaptures_ = 1;
980                         p += CAPTURE_CONUT_ADVANCE;
981                         if (groupName != nullptr) {
982                             if (ParseGroupSpecifier(&p, name)) {
983                                 if (strcmp(name.c_str(), groupName) == 0) {
984                                     return captureIndex;
985                                 }
986                             }
987                         }
988                         captureIndex++;
989                     }
990                 } else {
991                     captureIndex++;
992                 }
993                 break;
994             }
995             case '\\':
996                 p++;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
997                 break;
998             case '[': {
999                 while (p < end_ && *p != ']') {
1000                     if (*p == '\\') {
1001                         p++;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1002                     }
1003                     p++;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1004                 }
1005                 break;
1006             }
1007             default:
1008                 break;
1009         }
1010     }
1011     return captureIndex;
1012 }
1013 
1014 // NOLINTNEXTLINE(readability-function-size)
ParseAtomEscape(bool isBackward)1015 int RegExpParser::ParseAtomEscape(bool isBackward)
1016 {
1017     // AtomEscape[U, N]::
1018     //     DecimalEscape
1019     //     CharacterClassEscape[?U]
1020     //     CharacterEscape[?U]
1021     //     [+N]kGroupName[?U]
1022     int result = -1;
1023     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1024     PrintF("Parse AtomEscape------\n");
1025     PrevOpCode prevOp;
1026     switch (c0_) {
1027         case KEY_EOF:
1028             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1029             ParseError("unexpected end");
1030             break;
1031         // DecimalEscape
1032         case '1':
1033         case '2':
1034         case '3':
1035         case '4':
1036         case '5':
1037         case '6':
1038         case '7':
1039         case '8':
1040         case '9': {
1041             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1042             PrintF("NonZeroDigit %c\n", c0_);
1043             int capture = ParseDecimalDigits();
1044             if (capture > captureCount_ - 1 && capture > ParseCaptureCount(nullptr) - 1) {
1045                 ParseError("invalid backreference count");
1046                 break;
1047             }
1048             if (isBackward) {
1049                 BackwardBackReferenceOpCode backReferenceOp;
1050                 backReferenceOp.EmitOpCode(&buffer_, capture);
1051             } else {
1052                 BackReferenceOpCode backReferenceOp;
1053                 backReferenceOp.EmitOpCode(&buffer_, capture);
1054             }
1055             break;
1056         }
1057         // CharacterClassEscape
1058         case 'd': {
1059             // [0-9]
1060             RangeOpCode rangeOp;
1061             if (isBackward) {
1062                 prevOp.EmitOpCode(&buffer_, 0);
1063             }
1064             rangeOp.InsertOpCode(&buffer_, g_rangeD);
1065             goto parseLookBehind;
1066         }
1067         case 'D': {
1068             // [^0-9]
1069             RangeSet atomRange(g_rangeD);
1070             atomRange.Invert(IsUtf16());
1071             Range32OpCode rangeOp;
1072             if (isBackward) {
1073                 prevOp.EmitOpCode(&buffer_, 0);
1074             }
1075             rangeOp.InsertOpCode(&buffer_, atomRange);
1076             goto parseLookBehind;
1077         }
1078         case 's': {
1079             // [\f\n\r\t\v]
1080             RangeOpCode rangeOp;
1081             if (isBackward) {
1082                 prevOp.EmitOpCode(&buffer_, 0);
1083             }
1084             rangeOp.InsertOpCode(&buffer_, g_rangeS);
1085             goto parseLookBehind;
1086         }
1087         case 'S': {
1088             RangeSet atomRange(g_rangeS);
1089             Range32OpCode rangeOp;
1090             atomRange.Invert(IsUtf16());
1091             if (isBackward) {
1092                 prevOp.EmitOpCode(&buffer_, 0);
1093             }
1094             rangeOp.InsertOpCode(&buffer_, atomRange);
1095             goto parseLookBehind;
1096         }
1097         case 'w': {
1098             // [A-Za-z0-9]
1099             RangeOpCode rangeOp;
1100             if (isBackward) {
1101                 prevOp.EmitOpCode(&buffer_, 0);
1102             }
1103             rangeOp.InsertOpCode(&buffer_, g_rangeW);
1104             goto parseLookBehind;
1105         }
1106         case 'W': {
1107             // [^A-Za-z0-9]
1108             RangeSet atomRange(g_rangeW);
1109             atomRange.Invert(IsUtf16());
1110             Range32OpCode rangeOp;
1111             if (isBackward) {
1112                 prevOp.EmitOpCode(&buffer_, 0);
1113             }
1114             rangeOp.InsertOpCode(&buffer_, atomRange);
1115             goto parseLookBehind;
1116         }
1117         case 'P':
1118         case 'p': {
1119             //CharacterClassStrings
1120             RangeSet atomRange;
1121             Range32OpCode rangeOp;
1122             ParseClassEscape(&atomRange);
1123             if (isBackward) {
1124                 prevOp.EmitOpCode(&buffer_, 0);
1125             }
1126             rangeOp.InsertOpCode(&buffer_, atomRange);
1127             break;
1128         }
1129         // [+N]kGroupName[?U]
1130         case 'k': {
1131             Advance();
1132             if (c0_ != '<') {
1133                 if (!IsUtf16() || HasNamedCaptures()) {
1134                     ParseError("expecting group name.");
1135                     break;
1136                 }
1137             }
1138             Advance();
1139             Prev();
1140             CString name;
1141             auto **pp = const_cast<const uint8_t **>(&pc_);
1142             if (!ParseGroupSpecifier(pp, name)) {
1143                 ParseError("GroupName Syntax error.");
1144                 break;
1145             }
1146             int postion = FindGroupName(name);
1147             if (postion < 0) {
1148                 postion = ParseCaptureCount(name.c_str());
1149                 if (postion < 0 && (!IsUtf16() || HasNamedCaptures())) {
1150                     ParseError("group name not defined");
1151                     break;
1152                 }
1153             }
1154             if (isBackward) {
1155                 BackwardBackReferenceOpCode backReferenceOp;
1156                 backReferenceOp.EmitOpCode(&buffer_, postion);
1157             } else {
1158                 BackReferenceOpCode backReferenceOp;
1159                 backReferenceOp.EmitOpCode(&buffer_, postion);
1160             }
1161             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1162             Advance();
1163             break;
1164         }
1165         parseLookBehind: {
1166             if (isBackward) {
1167                 prevOp.EmitOpCode(&buffer_, 0);
1168             }
1169             Advance();
1170             break;
1171         }
1172         default:
1173             result = ParseCharacterEscape();
1174             break;
1175     }
1176     return result;
1177 }
1178 
RecountCaptures()1179 int RegExpParser::RecountCaptures()
1180 {
1181     if (totalCaptureCount_ < 0) {
1182         const char *name = reinterpret_cast<const char*>(groupNames_.buf_);
1183         totalCaptureCount_ = ParseCaptureCount(name);
1184     }
1185     return totalCaptureCount_;
1186 }
HasNamedCaptures()1187 bool RegExpParser::HasNamedCaptures()
1188 {
1189     if (hasNamedCaptures_ < 0) {
1190         RecountCaptures();
1191     }
1192     return false;
1193 }
1194 
ParseCharacterEscape()1195 int RegExpParser::ParseCharacterEscape()
1196 {
1197     // CharacterEscape[U]::
1198     //     ControlEscape
1199     //     c ControlLetter
1200     //     0 [lookahead ? DecimalDigit]
1201     //     HexEscapeSequence
1202     //     RegExpUnicodeEscapeSequence[?U]
1203     //     IdentityEscape[?U]
1204     uint32_t result = 0;
1205     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1206     switch (c0_) {
1207         // ControlEscape
1208         case 'f':
1209             result = '\f';
1210             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1211             PrintF("ControlEscape %c\n", c0_);
1212             Advance();
1213             break;
1214         case 'n':
1215             result = '\n';
1216             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1217             PrintF("ControlEscape %c\n", c0_);
1218             Advance();
1219             break;
1220         case 'r':
1221             result = '\r';
1222             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1223             PrintF("ControlEscape %c\n", c0_);
1224             Advance();
1225             break;
1226         case 't':
1227             result = '\t';
1228             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1229             PrintF("ControlEscape %c\n", c0_);
1230             Advance();
1231             break;
1232         case 'v':
1233             result = '\v';
1234             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1235             PrintF("ControlEscape %c\n", c0_);
1236             Advance();
1237             break;
1238         // c ControlLetter
1239         case 'c': {
1240             Advance();
1241             if ((c0_ >= 'A' && c0_ <= 'Z') || (c0_ >= 'a' && c0_ <= 'z')) {
1242                 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1243                 PrintF("ControlLetter %c\n", c0_);
1244                 result = static_cast<uint32_t>(c0_) & 0x1f;  // NOLINTNEXTLINE(readability-magic-numbers)
1245                 Advance();
1246             } else {
1247                 if (!IsUtf16()) {
1248                     pc_--;  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1249                     result = '\\';
1250                 } else {
1251                     ParseError("Invalid control letter");
1252                     return -1;
1253                 }
1254             }
1255             break;
1256         }
1257         case '0': {
1258             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1259             PrintF("CharacterEscape 0 [lookahead ? DecimalDigit]\n");
1260             if (IsUtf16() && !(*pc_ >= '0' && *pc_ <= '9')) {  // NOLINTNEXTLINE(readability-magic-numbers)
1261                 Advance();
1262                 result = 0;
1263                 break;
1264             }
1265             [[fallthrough]];
1266         }
1267         case '1':
1268         case '2':
1269         case '3':
1270         case '4':
1271         case '5':
1272         case '6':
1273         case '7': {
1274             if (IsUtf16()) {
1275                 // With /u, decimal escape is not interpreted as octal character code.
1276                 ParseError("Invalid class escape");
1277                 return 0;
1278             }
1279             result = ParseOctalLiteral();
1280             break;
1281         }
1282         // ParseHexEscapeSequence
1283         // ParseRegExpUnicodeEscapeSequence
1284         case 'x': {
1285             Advance();
1286             if (ParseHexEscape(UNICODE_HEX_ADVANCE, &result)) {
1287                 return result;
1288             }
1289             if (IsUtf16()) {
1290                 ParseError("Invalid class escape");
1291                 return -1;
1292             }
1293             result = 'x';
1294             break;
1295         }
1296         case 'u': {
1297             Advance();
1298             if (ParseUnicodeEscape(&result)) {
1299                 return result;
1300             }
1301             if (IsUtf16()) {
1302                 // With /u, invalid escapes are not treated as identity escapes.
1303                 ParseError("Invalid unicode escape");
1304                 return 0;
1305             }
1306             // If \u is not followed by a two-digit hexadecimal, treat it
1307             // as an identity escape.
1308             result = 'u';
1309             break;
1310         }
1311         // IdentityEscape[?U]
1312         case '$':
1313         case '(':
1314         case ')':
1315         case '*':
1316         case '+':
1317         case '.':
1318         case '/':
1319         case '?':
1320         case '[':
1321         case '\\':
1322         case ']':
1323         case '^':
1324         case '{':
1325         case '|':
1326         case '}':
1327             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1328             PrintF("IdentityEscape %c\n", c0_);
1329             result = c0_;
1330             Advance();
1331             break;
1332         default: {
1333             if (IsUtf16()) {
1334                 ParseError("Invalid unicode escape");
1335                 return 0;
1336             }
1337             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1338             PrintF("SourceCharacter %c\n", c0_);
1339             result = c0_;
1340             if (result < CHAR_MAXS) {
1341                 Advance();
1342             } else {
1343                 Prev();
1344                 const uint8_t *p = pc_;
1345                 result = static_cast<uint32_t>(base::StringHelper::UnicodeFromUtf8(p, UTF8_CHAR_LEN_MAX, &p));
1346                 int offset = static_cast<int>(p - pc_);
1347                 Advance(offset + 1);
1348             }
1349             break;
1350         }
1351     }
1352     return static_cast<int>(result);
1353 }
1354 
ParseClassRanges(RangeSet * result)1355 bool RegExpParser::ParseClassRanges(RangeSet *result)
1356 {
1357     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1358     PrintF("Parse ClassRanges------\n");
1359     while (c0_ != ']') {
1360         RangeSet s1;
1361         bool needInter = false;
1362         uint32_t c1 = ParseClassAtom(&s1);
1363         if (c1 == UINT32_MAX) {
1364             ParseError("invalid class range");
1365             return false;
1366         }
1367         needInter = NeedIntersection(c1);
1368         int next_c0 = *pc_;
1369         if (c0_ == '-' && next_c0 != ']') {
1370             if (c1 == CLASS_RANGE_BASE) {
1371                 if (IsUtf16()) {
1372                     ParseError("invalid class range");
1373                     return false;
1374                 }
1375                 result->Insert(s1);
1376                 continue;
1377             }
1378             Advance();
1379             RangeSet s2;
1380             uint32_t c2 = ParseClassAtom(&s2);
1381             if (c2 == UINT32_MAX) {
1382                 ParseError("invalid class range");
1383                 return false;
1384             }
1385             if (c2 == CLASS_RANGE_BASE) {
1386                 if (IsUtf16()) {
1387                     ParseError("invalid class range");
1388                     return false;
1389                 }
1390                 result->Insert(s2);
1391                 continue;
1392             }
1393             if (c1 < INT8_MAX) {
1394                 if (c1 > c2) {
1395                     ParseError("invalid class range");
1396                     return false;
1397                 }
1398             }
1399             needInter = NeedIntersection(c2);
1400             result->Insert(c1, c2);
1401             if (IsIgnoreCase() && needInter) {
1402                 ProcessIntersection(result);
1403             }
1404         } else {
1405             result->Insert(s1);
1406             if (!(IsIgnoreCase() && needInter)) {
1407                 continue;
1408             }
1409             if (c1 <= 'z' && c1 >= 'a') {
1410                 result->Insert(RangeSet(c1 - 'a' + 'A'));
1411             } else {
1412                 result->Insert(RangeSet(c1 - 'A' + 'a'));
1413             }
1414         }
1415     }
1416     Advance();
1417     return true;
1418 }
1419 
ParseClassAtom(RangeSet * atom)1420 uint32_t RegExpParser::ParseClassAtom(RangeSet *atom)
1421 {
1422     uint32_t ret = UINT32_MAX;
1423     switch (c0_) {
1424         case '\\': {
1425             Advance();
1426             ret = static_cast<uint32_t>(ParseClassEscape(atom));
1427             break;
1428         }
1429         case KEY_EOF:
1430             break;
1431         case 0: {
1432             if (pc_ >= end_) {
1433                 return UINT32_MAX;
1434             }
1435             [[fallthrough]];
1436         }
1437         default: {
1438             uint32_t value = c0_;
1439             size_t u16_size = 0;
1440             if (c0_ > INT8_MAX) {  // NOLINTNEXTLINE(readability-magic-numbers)
1441                 pc_ -= 1;          // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1442                 auto u16_result = base::utf_helper::ConvertUtf8ToUtf16Pair(pc_, true);
1443                 value = u16_result.first;
1444                 u16_size = u16_result.second;
1445                 Advance(u16_size + 1);
1446             } else {
1447                 Advance();
1448             }
1449             atom->Insert(RangeSet(value));
1450             ret = value;
1451             break;
1452         }
1453     }
1454     return ret;
1455 }
1456 
ParseClassEscape(RangeSet * atom)1457 int RegExpParser::ParseClassEscape(RangeSet *atom)
1458 {
1459     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1460     PrintF("Parse ClassEscape------\n");
1461     int result = -1;
1462     switch (c0_) {
1463         case 'b':
1464             Advance();
1465             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1466             PrintF("ClassEscape %c", 'b');
1467             result = '\b';
1468             atom->Insert(RangeSet(static_cast<uint32_t>('\b')));
1469             break;
1470         case '-':
1471             Advance();
1472             result = '-';
1473             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1474             PrintF("ClassEscape %c", '-');
1475             atom->Insert(RangeSet(static_cast<uint32_t>('-')));
1476             break;
1477         // CharacterClassEscape
1478         case 'd':
1479         case 'D':
1480             result = CLASS_RANGE_BASE;
1481             atom->Insert(g_rangeD);
1482             if (c0_ == 'D') {
1483                 atom->Invert(IsUtf16());
1484             }
1485             Advance();
1486             break;
1487         case 's':
1488         case 'S':
1489             result = CLASS_RANGE_BASE;
1490             atom->Insert(g_rangeS);
1491             if (c0_ == 'S') {
1492                 atom->Invert(IsUtf16());
1493             }
1494             Advance();
1495             break;
1496         case 'w':
1497         case 'W':
1498             // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1499             PrintF("ClassEscape::CharacterClassEscape %c\n", c0_);
1500             result = CLASS_RANGE_BASE;
1501             atom->Insert(g_rangeW);
1502             if (c0_ == 'W') {
1503                 atom->Invert(IsUtf16());
1504             }
1505             Advance();
1506             break;
1507         case 'P':
1508         case 'p': {
1509             bool negate = (c0_ == 'P');
1510             CString propertyName;
1511             CString valueName;
1512             if (!ParseUnicodePropertyValueCharacters(propertyName, valueName) ||
1513                 !ParseUnicodePropertyClassRange(propertyName, valueName, atom, negate)) {
1514                 CString msg = "Invalid regular expression of unicode";
1515                 ParseError(msg.c_str());
1516             }
1517             result = CLASS_RANGE_BASE;
1518             break;
1519         }
1520         default:
1521             result = ParseCharacterEscape();
1522             int value = result;
1523             if (IsIgnoreCase()) {
1524                 value = Canonicalize(value, IsUtf16());
1525             }
1526             atom->Insert(RangeSet(static_cast<uint32_t>(value)));
1527             break;
1528     }
1529     return result;
1530 }
1531 
ParseUnicodePropertyValueCharacters(CString & propertyName,CString & valueName)1532 bool RegExpParser::ParseUnicodePropertyValueCharacters(CString &propertyName, CString &valueName)
1533 {
1534     Advance();
1535     if (c0_ == '{') {
1536         if (!GetUnicodePropertyName(propertyName)) {
1537             return false;
1538         }
1539 
1540         if (!GetUnicodePropertyValueName(valueName)) {
1541             return false;
1542         }
1543     } else {
1544         return false;
1545     }
1546     Advance();
1547     return true;
1548 }
1549 
GetUnicodePropertyName(CString & propertyName)1550 bool RegExpParser::GetUnicodePropertyName(CString &propertyName)
1551 {
1552     Advance();
1553     while (c0_ != '}' && c0_ != '=') {
1554         if (IsUnicodePropertyValueCharacter(c0_)) {
1555             propertyName += c0_;
1556         } else {
1557             return false;
1558         }
1559         Advance();
1560     }
1561     return true;
1562 }
1563 
GetUnicodePropertyValueName(CString & valueName)1564 bool RegExpParser::GetUnicodePropertyValueName(CString &valueName)
1565 {
1566     if (c0_ == '=') {
1567         Advance();
1568         while (c0_ != '}') {
1569             if (IsUnicodePropertyValueCharacter(c0_)) {
1570                 valueName += c0_;
1571             } else {
1572                 return false;
1573             }
1574             Advance();
1575         }
1576     }
1577     return true;
1578 }
1579 
1580 // NOLINTNEXTLINE(cert-dcl50-cpp)
PrintF(const char * fmt,...)1581 void RegExpParser::PrintF(const char *fmt, ...)
1582 {
1583 #ifndef _NO_DEBUG_
1584     va_list args;
1585     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,)
1586     va_start(args, fmt);
1587     vprintf(fmt, args);
1588     va_end(args);
1589 #else
1590     (void)fmt;
1591 #endif
1592 }
1593 
ParseError(const char * errorMessage)1594 void RegExpParser::ParseError(const char *errorMessage)
1595 {
1596     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1597     PrintF("error: ");
1598     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1599     PrintF(errorMessage);
1600     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1601     PrintF("\n");
1602     SetIsError();
1603     size_t length = strlen(errorMessage) + 1;
1604     if (memcpy_s(errorMsg_, length, errorMessage, length) != EOK) {
1605         LOG_FULL(FATAL) << "memcpy_s failed";
1606         UNREACHABLE();
1607     }
1608 }
1609 
IsIdentFirst(uint32_t c)1610 int RegExpParser::IsIdentFirst(uint32_t c)
1611 {
1612     if (c < CACHE_SIZE) {
1613         return (ID_START_TABLE_ASCII[c >> 5] >> (c & 31)) & 1; // 5: Shift five bits 31: and operation binary of 31
1614     } else {
1615         auto uchar = static_cast<UChar32>(c);
1616         return static_cast<int>(u_isIDStart(uchar));
1617     }
1618 }
1619 
Canonicalize(int c,bool isUnicode)1620 int RegExpParser::Canonicalize(int c, bool isUnicode)
1621 {
1622     if (c < TMP_BUF_SIZE) {  // NOLINTNEXTLINE(readability-magic-numbers)
1623         if (c >= 'a' && c <= 'z') {
1624             c = c - 'a' + 'A';
1625         }
1626     } else {
1627         int cur = c;
1628         if (isUnicode) {
1629             c = u_tolower(static_cast<UChar32>(c));
1630             if (c >= 'a' && c <= 'z') {
1631                 c = cur;
1632             }
1633         } else {
1634             c = u_toupper(static_cast<UChar32>(c));
1635             if (c >= 'A' && c <= 'Z') {
1636                 c = cur;
1637             }
1638         }
1639     }
1640     return c;
1641 }
1642 
NeedIntersection(uint32_t c)1643 bool RegExpParser::NeedIntersection(uint32_t c)
1644 {
1645     return (c <= 'z' && c >= 'a') || (c <= 'Z' && c >= 'A');
1646 }
1647 
DoParserStackOverflowCheck(const char * errorMessage)1648 void RegExpParser::DoParserStackOverflowCheck(const char *errorMessage)
1649 {
1650     if (UNLIKELY(thread_->GetCurrentStackPosition() < thread_->GetStackLimit())) {
1651         LOG_ECMA(ERROR) << "Stack overflow! current:" << thread_->GetCurrentStackPosition() <<
1652             " limit:" << thread_->GetStackLimit();
1653         ParseError(errorMessage);
1654         return;
1655     }
1656 }
1657 
ParseUnicodePropertyClassRange(CString & propertyName,CString & valueName,RangeSet * atom,bool negate)1658 bool RegExpParser::ParseUnicodePropertyClassRange(CString &propertyName, CString &valueName,
1659                                                   RangeSet *atom, bool negate)
1660 {
1661     const char *name = propertyName.c_str();
1662     if (valueName.size() == 0) {
1663         if (MatchUnicodeProperty(UCHAR_GENERAL_CATEGORY_MASK, name, atom, negate)) {
1664             return true;
1665         }
1666         if (MatchSepcialUnicodeProperty(propertyName, negate, atom)) {
1667             return true;
1668         }
1669         UProperty property = u_getPropertyEnum(name);
1670         if (!IsSupportedBinaryProperty(property)) {
1671             return false;
1672         }
1673         if (!IsExactPropertyAlias(name, property)) {
1674             return false;
1675         }
1676         if (negate && IsBinaryPropertyOfStrings(property)) {
1677             return false;
1678         }
1679         return MatchUnicodeProperty(property, negate ? "N" : "Y", atom, false);
1680     } else {
1681         UProperty property = u_getPropertyEnum(propertyName.c_str());
1682         if (property == UCHAR_GENERAL_CATEGORY) {
1683             property = UCHAR_GENERAL_CATEGORY_MASK;
1684         } else if (property != UCHAR_SCRIPT && property != UCHAR_SCRIPT_EXTENSIONS) {
1685             return false;
1686         }
1687         return MatchUnicodeProperty(property, valueName.c_str(), atom, negate);
1688     }
1689 }
1690 
MatchUnicodeProperty(UProperty property,const char * propertyName,RangeSet * atom,bool negate)1691 bool RegExpParser::MatchUnicodeProperty(UProperty property, const char* propertyName, RangeSet *atom, bool negate)
1692 {
1693     UProperty propertyForMatch = property;
1694     if (propertyForMatch == UCHAR_SCRIPT_EXTENSIONS) {
1695         propertyForMatch = UCHAR_SCRIPT;
1696     }
1697     int32_t propertyValue = u_getPropertyValueEnum(propertyForMatch, propertyName);
1698     if (propertyValue == UCHAR_INVALID_CODE) {
1699         return false;
1700     }
1701     if (!IsExactPropertyValueAlis(propertyName, propertyForMatch, propertyValue)) {
1702         return false;
1703     }
1704     UErrorCode ec = U_ZERO_ERROR;
1705     icu::UnicodeSet set;
1706     set.applyIntPropertyValue(property, propertyValue, ec);
1707     bool success = ec == U_ZERO_ERROR && !set.isEmpty();
1708     if (success) {
1709         const bool caseFolding = IsIgnoreCase();
1710         if (negate) {
1711             set.complement();
1712         }
1713         if (caseFolding) {
1714             set.closeOver(USET_CASE_INSENSITIVE);
1715         }
1716         set.removeAllStrings();
1717         for (int i = 0; i < set.getRangeCount(); i++) {
1718             atom->Insert(set.getRangeStart(i),  set.getRangeEnd(i));
1719         }
1720     }
1721     return success;
1722 }
1723 
IsExactPropertyValueAlis(const char * valueName,UProperty property,int32_t propertyValue)1724 bool RegExpParser::IsExactPropertyValueAlis(const char *valueName, UProperty property, int32_t propertyValue)
1725 {
1726     const char *shortName = u_getPropertyValueName(property, propertyValue, U_SHORT_PROPERTY_NAME);
1727     if (shortName != nullptr && strcmp(valueName, shortName) == 0) {
1728         return true;
1729     }
1730     int i = 0;
1731     bool flag = true;
1732     while (flag) {
1733         const char *longName = u_getPropertyValueName(property, propertyValue,
1734             static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i));
1735         if (longName == nullptr) {
1736             flag = false;
1737             break;
1738         }
1739         if (strcmp(valueName, longName) == 0) {
1740             return true;
1741         }
1742         i++;
1743     }
1744     return false;
1745 }
1746 
IsExactPropertyAlias(const char * propertyName,UProperty property)1747 bool RegExpParser::IsExactPropertyAlias(const char* propertyName, UProperty property)
1748 {
1749     const char* shortName = u_getPropertyName(property, U_SHORT_PROPERTY_NAME);
1750     if (shortName != nullptr && strcmp(propertyName, shortName) == 0) {
1751         return true;
1752     }
1753     int i = 0;
1754     bool flag = true;
1755     while (flag) {
1756         const char* longName = u_getPropertyName(property,
1757             static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i));
1758         if (longName == nullptr) {
1759             flag = false;
1760             break;
1761         }
1762         if (strcmp(propertyName, longName) == 0) {
1763             return true;
1764         }
1765         i++;
1766     }
1767     return false;
1768 }
1769 
MatchSepcialUnicodeProperty(CString & name,bool negate,RangeSet * atom)1770 bool RegExpParser::MatchSepcialUnicodeProperty(CString &name, bool negate, RangeSet *atom)
1771 {
1772     if (name == "Any") {
1773         if (!negate) {
1774             atom->Insert(0, 0x10FFFF);
1775         }
1776     } else if (name == "ASCII") {
1777         if (negate) {
1778             atom->Insert(0x80, 0x10FFFF);
1779         } else {
1780             atom->Insert(0x0, 0x7F);
1781         }
1782     } else if (name == "Assigned") {
1783         return MatchUnicodeProperty(UCHAR_GENERAL_CATEGORY, "Unassigned", atom, !negate);
1784     } else {
1785         return false;
1786     }
1787     return true;
1788 }
1789 
IsSupportedBinaryProperty(UProperty property)1790 bool RegExpParser::IsSupportedBinaryProperty(UProperty property)
1791 {
1792     switch (property) {
1793         case UCHAR_ALPHABETIC:
1794         case UCHAR_ASCII_HEX_DIGIT:
1795         case UCHAR_BIDI_CONTROL:
1796         case UCHAR_BIDI_MIRRORED:
1797         case UCHAR_DASH:
1798         case UCHAR_DEFAULT_IGNORABLE_CODE_POINT:
1799         case UCHAR_DEPRECATED:
1800         case UCHAR_DIACRITIC:
1801         case UCHAR_JOIN_CONTROL:
1802         case UCHAR_IDS_TRINARY_OPERATOR:
1803         case UCHAR_IDS_BINARY_OPERATOR:
1804         case UCHAR_IDEOGRAPHIC:
1805         case UCHAR_S_TERM:
1806         case UCHAR_ID_START:
1807         case UCHAR_ID_CONTINUE:
1808         case UCHAR_HEX_DIGIT:
1809         case UCHAR_GRAPHEME_EXTEND:
1810         case UCHAR_GRAPHEME_BASE:
1811         case UCHAR_EXTENDER:
1812         case UCHAR_LOGICAL_ORDER_EXCEPTION:
1813         case UCHAR_LOWERCASE:
1814         case UCHAR_MATH:
1815         case UCHAR_NONCHARACTER_CODE_POINT:
1816         case UCHAR_QUOTATION_MARK:
1817         case UCHAR_RADICAL:
1818         case UCHAR_SOFT_DOTTED:
1819         case UCHAR_TERMINAL_PUNCTUATION:
1820         case UCHAR_UNIFIED_IDEOGRAPH:
1821         case UCHAR_UPPERCASE:
1822         case UCHAR_WHITE_SPACE:
1823         case UCHAR_XID_CONTINUE:
1824         case UCHAR_XID_START:
1825         case UCHAR_VARIATION_SELECTOR:
1826         case UCHAR_PATTERN_SYNTAX:
1827         case UCHAR_PATTERN_WHITE_SPACE:
1828         case UCHAR_CASED:
1829         case UCHAR_CASE_IGNORABLE:
1830         case UCHAR_CHANGES_WHEN_LOWERCASED:
1831         case UCHAR_CHANGES_WHEN_UPPERCASED:
1832         case UCHAR_CHANGES_WHEN_TITLECASED:
1833         case UCHAR_CHANGES_WHEN_CASEFOLDED:
1834         case UCHAR_CHANGES_WHEN_CASEMAPPED:
1835         case UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED:
1836         case UCHAR_REGIONAL_INDICATOR:
1837         case UCHAR_EMOJI:
1838         case UCHAR_EMOJI_PRESENTATION:
1839         case UCHAR_EMOJI_MODIFIER:
1840         case UCHAR_EMOJI_MODIFIER_BASE:
1841         case UCHAR_EMOJI_COMPONENT:
1842         case UCHAR_EXTENDED_PICTOGRAPHIC:
1843             return true;
1844         case UCHAR_BASIC_EMOJI:
1845         case UCHAR_EMOJI_KEYCAP_SEQUENCE:
1846         case UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE:
1847         case UCHAR_RGI_EMOJI_FLAG_SEQUENCE:
1848         case UCHAR_RGI_EMOJI_TAG_SEQUENCE:
1849         case UCHAR_RGI_EMOJI_ZWJ_SEQUENCE:
1850         case UCHAR_RGI_EMOJI:
1851             return false;
1852         default:
1853             break;
1854     }
1855     return false;
1856 }
1857 
IsBinaryPropertyOfStrings(UProperty property)1858 bool RegExpParser::IsBinaryPropertyOfStrings(UProperty property)
1859 {
1860     switch (property) {
1861         case UCHAR_BASIC_EMOJI:
1862         case UCHAR_EMOJI_KEYCAP_SEQUENCE:
1863         case UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE:
1864         case UCHAR_RGI_EMOJI_FLAG_SEQUENCE:
1865         case UCHAR_RGI_EMOJI_TAG_SEQUENCE:
1866         case UCHAR_RGI_EMOJI_ZWJ_SEQUENCE:
1867         case UCHAR_RGI_EMOJI:
1868             return true;
1869         default:
1870             break;
1871     }
1872     return false;
1873 }
1874 }  // namespace panda::ecmascript
1875