1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ecmascript/regexp/regexp_parser.h"
17
18 #include "ecmascript/base/string_helper.h"
19 #include "libpandabase/utils/utils.h"
20 #define _NO_DEBUG_
21
22 namespace panda::ecmascript {
23 static constexpr uint32_t CACHE_SIZE = 128;
24 static constexpr uint32_t CHAR_MAXS = 128;
25 static constexpr uint32_t ID_START_TABLE_ASCII[4] = {
26 /* $ A-Z _ a-z */
27 0x00000000, 0x00000010, 0x87FFFFFE, 0x07FFFFFE
28 };
29 static RangeSet g_rangeD(0x30, 0x39); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
30 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
31 static RangeSet g_rangeS({
32 std::pair<uint32_t, uint32_t>(0x0009, 0x000D), // NOLINTNEXTLINE(readability-magic-numbers)
33 std::pair<uint32_t, uint32_t>(0x0020, 0x0020), // NOLINTNEXTLINE(readability-magic-numbers)
34 std::pair<uint32_t, uint32_t>(0x00A0, 0x00A0), // NOLINTNEXTLINE(readability-magic-numbers)
35 std::pair<uint32_t, uint32_t>(0x1680, 0x1680), // NOLINTNEXTLINE(readability-magic-numbers)
36 std::pair<uint32_t, uint32_t>(0x2000, 0x200A), // NOLINTNEXTLINE(readability-magic-numbers)
37 /* 2028;LINE SEPARATOR;Zl;0;WS;;;;;N;;;;; */
38 /* 2029;PARAGRAPH SEPARATOR;Zp;0;B;;;;;N;;;;; */
39 std::pair<uint32_t, uint32_t>(0x2028, 0x2029), // NOLINTNEXTLINE(readability-magic-numbers)
40 std::pair<uint32_t, uint32_t>(0x202F, 0x202F), // NOLINTNEXTLINE(readability-magic-numbers)
41 std::pair<uint32_t, uint32_t>(0x205F, 0x205F), // NOLINTNEXTLINE(readability-magic-numbers)
42 std::pair<uint32_t, uint32_t>(0x3000, 0x3000), // NOLINTNEXTLINE(readability-magic-numbers)
43 /* FEFF;ZERO WIDTH NO-BREAK SPACE;Cf;0;BN;;;;;N;BYTE ORDER MARK;;;; */
44 std::pair<uint32_t, uint32_t>(0xFEFF, 0xFEFF), // NOLINTNEXTLINE(readability-magic-numbers)
45 });
46
47 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
48 static RangeSet g_rangeW({
49 std::pair<uint32_t, uint32_t>(0x0030, 0x0039), // NOLINTNEXTLINE(readability-magic-numbers)
50 std::pair<uint32_t, uint32_t>(0x0041, 0x005A), // NOLINTNEXTLINE(readability-magic-numbers)
51 std::pair<uint32_t, uint32_t>(0x005F, 0x005F), // NOLINTNEXTLINE(readability-magic-numbers)
52 std::pair<uint32_t, uint32_t>(0x0061, 0x007A), // NOLINTNEXTLINE(readability-magic-numbers)
53 });
54
55 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
56 static RangeSet g_regexpIdentifyStart({
57 std::pair<uint32_t, uint32_t>(0x0024, 0x0024), // NOLINTNEXTLINE(readability-magic-numbers)
58 std::pair<uint32_t, uint32_t>(0x0041, 0x005A), // NOLINTNEXTLINE(readability-magic-numbers)
59 std::pair<uint32_t, uint32_t>(0x0061, 0x007A), // NOLINTNEXTLINE(readability-magic-numbers)
60 });
61
62 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
63 static RangeSet g_regexpIdentifyContinue({
64 std::pair<uint32_t, uint32_t>(0x0024, 0x0024), // NOLINTNEXTLINE(readability-magic-numbers)
65 std::pair<uint32_t, uint32_t>(0x0030, 0x0039), // NOLINTNEXTLINE(readability-magic-numbers)
66 std::pair<uint32_t, uint32_t>(0x0041, 0x005A), // NOLINTNEXTLINE(readability-magic-numbers)
67 std::pair<uint32_t, uint32_t>(0x0061, 0x007A), // NOLINTNEXTLINE(readability-magic-numbers)
68 });
69
Parse()70 void RegExpParser::Parse()
71 {
72 // dynbuffer head init [size,capture_count,statck_count,flags,prefilter]
73 buffer_.EmitU32(0);
74 buffer_.EmitU32(0);
75 buffer_.EmitU32(0);
76 buffer_.EmitU32(0);
77 buffer_.EmitU32(0);
78 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
79 PrintF("Parse Pattern------\n");
80 // Pattern[U, N]::
81 // Disjunction[?U, ?N]
82 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
83 Advance();
84 SaveStartOpCode saveStartOp;
85 int captureIndex = captureCount_++;
86 saveStartOp.EmitOpCode(&buffer_, captureIndex);
87 ParseDisjunction(false);
88 if (isError_) {
89 return;
90 }
91 if (c0_ != KEY_EOF) {
92 ParseError("extraneous characters at the end");
93 return;
94 }
95 SaveEndOpCode saveEndOp;
96 saveEndOp.EmitOpCode(&buffer_, captureIndex);
97 MatchEndOpCode matchEndOp;
98 matchEndOp.EmitOpCode(&buffer_, 0);
99
100 uint32_t ptr = RegExpParser::OP_START_OFFSET;
101 ptr += static_cast<uint32_t>(RegExpOpCode::GetRegExpOpCode(RegExpOpCode::OP_SAVE_START)->GetSize());
102 uint8_t opCode = buffer_.GetU8(ptr);
103 uint16_t expectedChar = 0;
104 if (opCode == RegExpOpCode::OP_CHAR && !IsIgnoreCase()) {
105 expectedChar = buffer_.GetU16(ptr + 1);
106 if (expectedChar > UINT8_MAX) {
107 expectedChar = 0;
108 }
109 }
110
111 // dynbuffer head assignments
112 buffer_.PutU32(0, buffer_.size_);
113 buffer_.PutU32(NUM_CAPTURE__OFFSET, captureCount_);
114 buffer_.PutU32(NUM_STACK_OFFSET, stackCount_);
115 buffer_.PutU32(FLAGS_OFFSET, flags_);
116 buffer_.PutU32(PREFILTER_OFFSET, expectedChar);
117 #ifndef _NO_DEBUG_
118 RegExpOpCode::DumpRegExpOpCode(std::cout, buffer_, buffer_.GetSize());
119 #endif
120 }
121
ParseDisjunction(bool isBackward)122 void RegExpParser::ParseDisjunction(bool isBackward)
123 {
124 // check stack overflow because infinite recursion may occur
125 DoParserStackOverflowCheck("invalid regular expression.");
126 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
127 PrintF("Parse Disjunction------\n");
128 if (c0_ == ')') {
129 isEmpty_ = true;
130 return;
131 }
132 size_t start = buffer_.size_;
133 ParseAlternative(isBackward);
134 if (isError_) {
135 return;
136 }
137 uint32_t para = RegExpOpCode::INVALID_PARA;
138 do {
139 if (c0_ == '|') {
140 SplitNextOpCode splitOp;
141 uint32_t len = buffer_.size_ - start;
142 GotoOpCode gotoOp;
143 splitOp.InsertOpCode(&buffer_, start, len + gotoOp.GetSize());
144 uint32_t pos = gotoOp.EmitOpCode(&buffer_, 0) - gotoOp.GetSize();
145 gotoOp.UpdateOpPara(&buffer_, pos, para);
146 Advance();
147 ParseAlternative(isBackward);
148 para = buffer_.size_ - pos - gotoOp.GetSize();
149 if (c0_ != '|') {
150 uint16_t cnt = 0;
151 uint32_t opCharSize =
152 static_cast<uint32_t>(RegExpOpCode::GetRegExpOpCode(RegExpOpCode::OP_CHAR)->GetSize());
153 uint32_t opSplitSize =
154 static_cast<uint32_t>(RegExpOpCode::GetRegExpOpCode(RegExpOpCode::OP_SPLIT_NEXT)->GetSize());
155 std::vector<uint16_t> chars;
156 std::vector<uint32_t> offsets;
157 std::set<uint16_t> checkSet;
158 uint32_t ptr = start;
159 bool isSparseable = true;
160 do {
161 uint8_t opCode = buffer_.GetU8(ptr);
162 uint32_t offset = 0;
163 uint32_t branch = ptr;
164 bool isLastBranch = false;
165 if (opCode == RegExpOpCode::OP_SPLIT_NEXT) {
166 offset = buffer_.GetU32(ptr + 1);
167 branch = ptr + offset + opSplitSize;
168 } else {
169 isLastBranch = true;
170 }
171 uint8_t opCodeChar = buffer_.GetU8(branch);
172 if (opCodeChar == RegExpOpCode::OP_CHAR) {
173 chars.push_back(buffer_.GetU16(branch + 1));
174 offsets.push_back(offset);
175 if (checkSet.find(chars[cnt]) != checkSet.end()) {
176 isSparseable = false;
177 break;
178 }
179 checkSet.insert(chars[cnt]);
180 } else {
181 isSparseable = false;
182 break;
183 }
184 cnt++;
185 if (isLastBranch) {
186 break;
187 }
188 ptr += opSplitSize;
189 } while (true);
190
191 if (isSparseable) {
192 uint32_t sparseLen = SPARSE_HEAD_OFFSET + static_cast<uint32_t>(cnt) * SPARSE_MAX_OFFSET;
193 uint32_t splitsLen = static_cast<uint32_t>(cnt - 1) * opSplitSize;
194 ptr = start;
195 buffer_.Insert(start, sparseLen - splitsLen);
196 pos += sparseLen - splitsLen;
197 buffer_.PutU8(ptr, RegExpOpCode::OP_SPARSE);
198 buffer_.PutU16(ptr + 1, cnt);
199 ptr += SPARSE_HEAD_OFFSET;
200 ASSERT(chars.size() > 0);
201 for (int32_t i = static_cast<int32_t>(chars.size() - 1); i >= 0; i--) {
202 buffer_.PutU16(ptr, chars[i]);
203 // 2: cnt = count of splits + 1, for invert index should be extra - 1, so -1-1=-2
204 offsets[i] += opCharSize - opSplitSize * std::max(0, cnt - i -2);
205 buffer_.PutU32(ptr + SPARSE_OFF_OFFSET, offsets[i]);
206 ptr += SPARSE_MAX_OFFSET;
207 }
208 }
209 bool isEnd = false;
210 do {
211 uint32_t paraTmp = buffer_.GetU32(pos + 1);
212 if (paraTmp == RegExpOpCode::INVALID_PARA) {
213 isEnd = true;
214 }
215 buffer_.PutU32(pos + 1, para);
216 para += paraTmp + gotoOp.GetSize();
217 pos -= paraTmp + gotoOp.GetSize();
218 } while (!isEnd);
219 }
220 if (isError_) {
221 return;
222 }
223 }
224 } while (c0_ != KEY_EOF && c0_ != ')');
225 }
226
ParseOctalLiteral()227 uint32_t RegExpParser::ParseOctalLiteral()
228 {
229 // For compatibility with some other browsers (not all), we parse
230 // up to three octal digits with a value below 256.
231 // ES#prod-annexB-LegacyOctalEscapeSequence
232 uint32_t value = c0_ - '0';
233 Advance();
234 if (c0_ >= '0' && c0_ <= '7') {
235 value = value * OCTAL_VALUE + c0_ - '0';
236 Advance();
237 if (value < OCTAL_VALUE_RANGE && c0_ >= '0' && c0_ <= '7') {
238 value = value * OCTAL_VALUE + c0_ - '0';
239 Advance();
240 }
241 }
242 return value;
243 }
244
ParseUnlimitedLengthHexNumber(uint32_t maxValue,uint32_t * value)245 bool RegExpParser::ParseUnlimitedLengthHexNumber(uint32_t maxValue, uint32_t *value)
246 {
247 uint32_t x = 0;
248 int d = static_cast<int>(HexValue(c0_));
249 if (d < 0) {
250 return false;
251 }
252 while (d >= 0) {
253 if (UNLIKELY(x > (std::numeric_limits<uint32_t>::max() - static_cast<uint32_t>(d)) / HEX_VALUE)) {
254 LOG_FULL(FATAL) << "value overflow";
255 return false;
256 }
257 x = x * HEX_VALUE + static_cast<uint32_t>(d);
258 if (x > maxValue) {
259 return false;
260 }
261 Advance();
262 d = static_cast<int>(HexValue(c0_));
263 }
264 *value = x;
265 return true;
266 }
267
268 // This parses RegExpUnicodeEscapeSequence as described in ECMA262.
ParseUnicodeEscape(uint32_t * value)269 bool RegExpParser::ParseUnicodeEscape(uint32_t *value)
270 {
271 // Accept both \uxxxx and \u{xxxxxx} (if allowed).
272 // In the latter case, the number of hex digits between { } is arbitrary.
273 // \ and u have already been read.
274 if (c0_ == '{' && IsUtf16()) {
275 uint8_t *start = pc_ - 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
276 Advance();
277 if (ParseUnlimitedLengthHexNumber(0x10FFFF, value)) { // NOLINTNEXTLINE(readability-magic-numbers)
278 if (c0_ == '}') {
279 Advance();
280 return true;
281 }
282 }
283 pc_ = start;
284 Advance();
285 return false;
286 }
287 // \u but no {, or \u{...} escapes not allowed.
288 bool result = ParseHexEscape(UNICODE_HEX_VALUE, value);
289 if (result && IsUtf16() && U16_IS_LEAD(*value) && c0_ == '\\') {
290 // Attempt to read trail surrogate.
291 uint8_t *start = pc_ - 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
292 if (*pc_ == 'u') {
293 Advance(UNICODE_HEX_ADVANCE);
294 uint32_t trail = 0;
295 if (ParseHexEscape(UNICODE_HEX_VALUE, &trail) && U16_IS_TRAIL(trail)) {
296 *value = U16_GET_SUPPLEMENTARY((*value), (trail)); // NOLINTNEXTLINE(hicpp-signed-bitwise)
297 return true;
298 }
299 }
300 pc_ = start;
301 Advance();
302 }
303 return result;
304 }
305
ParseHexEscape(int length,uint32_t * value)306 bool RegExpParser::ParseHexEscape(int length, uint32_t *value)
307 {
308 uint8_t *start = pc_ - 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
309 uint32_t val = 0;
310 for (int i = 0; i < length; ++i) {
311 uint32_t c = c0_;
312 int d = static_cast<int>(HexValue(c));
313 if (d < 0) {
314 pc_ = start;
315 Advance();
316 return false;
317 }
318 val = val * HEX_VALUE + static_cast<uint32_t>(d);
319 Advance();
320 }
321 *value = val;
322 return true;
323 }
324
325 // NOLINTNEXTLINE(readability-function-size)
ParseAlternative(bool isBackward)326 void RegExpParser::ParseAlternative(bool isBackward)
327 {
328 size_t start = buffer_.size_;
329 while (c0_ != '|' && c0_ != KEY_EOF && c0_ != ')') {
330 if (isError_) {
331 return;
332 }
333 size_t atomBcStart = buffer_.GetSize();
334 int captureIndex = 0;
335 bool isAtom = false;
336 switch (c0_) {
337 case '^': {
338 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
339 PrintF("Assertion %c line start \n", c0_);
340 LineStartOpCode lineStartOp;
341 lineStartOp.EmitOpCode(&buffer_, 0);
342 Advance();
343 break;
344 }
345 case '$': {
346 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
347 PrintF("Assertion %c line end \n", c0_);
348 LineEndOpCode lineEndOp;
349 lineEndOp.EmitOpCode(&buffer_, 0);
350 Advance();
351 break;
352 }
353 case '\\': {
354 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
355 PrintF("Escape %c \n", c0_);
356 Advance();
357 switch (c0_) {
358 case 'b': {
359 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
360 PrintF("Assertion %c \n", c0_);
361 WordBoundaryOpCode wordBoundaryOp;
362 wordBoundaryOp.EmitOpCode(&buffer_, 0);
363 Advance();
364 break;
365 }
366 case 'B': {
367 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
368 PrintF("Assertion %c \n", c0_);
369 NotWordBoundaryOpCode notWordBoundaryOp;
370 notWordBoundaryOp.EmitOpCode(&buffer_, 0);
371 Advance();
372 break;
373 }
374 default: {
375 isAtom = true;
376 int atomValue = ParseAtomEscape(isBackward);
377 if (atomValue != -1) {
378 PrevOpCode prevOp;
379 if (isBackward) {
380 prevOp.EmitOpCode(&buffer_, 0);
381 }
382 if (IsIgnoreCase()) {
383 if (!IsUtf16()) {
384 atomValue = Canonicalize(atomValue, false);
385 } else {
386 icu::UnicodeSet set(atomValue, atomValue);
387 set.closeOver(USET_CASE_INSENSITIVE);
388 set.removeAllStrings();
389 uint32_t size = static_cast<uint32_t>(set.size());
390 RangeOpCode rangeOp;
391 RangeSet rangeResult;
392 for (uint32_t idx = 0; idx < size; idx++) {
393 int32_t uc = set.charAt(idx);
394 RangeSet curRange(uc);
395 rangeResult.Insert(curRange);
396 }
397 rangeOp.InsertOpCode(&buffer_, rangeResult);
398 break;
399 }
400 }
401 if (atomValue <= UINT16_MAX) {
402 CharOpCode charOp;
403 charOp.EmitOpCode(&buffer_, atomValue);
404 } else {
405 Char32OpCode charOp;
406 charOp.EmitOpCode(&buffer_, atomValue);
407 }
408 if (isBackward) {
409 prevOp.EmitOpCode(&buffer_, 0);
410 }
411 }
412 break;
413 }
414 }
415 break;
416 }
417 case '(': {
418 Advance();
419 isAtom = ParseAssertionCapture(&captureIndex, isBackward);
420 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
421 Advance();
422 break;
423 }
424 case '.': {
425 PrevOpCode prevOp;
426 if (isBackward) {
427 prevOp.EmitOpCode(&buffer_, 0);
428 }
429 if (IsDotAll()) {
430 AllOpCode allOp;
431 allOp.EmitOpCode(&buffer_, 0);
432 } else {
433 DotsOpCode dotsOp;
434 dotsOp.EmitOpCode(&buffer_, 0);
435 }
436 if (isBackward) {
437 prevOp.EmitOpCode(&buffer_, 0);
438 }
439 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
440 PrintF("Atom %c match any \n", c0_);
441 isAtom = true;
442 Advance();
443 break;
444 }
445 case '[': {
446 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
447 PrintF("Atom %c match range \n", c0_);
448 isAtom = true;
449 PrevOpCode prevOp;
450 Advance();
451 if (isBackward) {
452 prevOp.EmitOpCode(&buffer_, 0);
453 }
454 bool isInvert = false;
455 if (c0_ == '^') {
456 isInvert = true;
457 Advance();
458 }
459 RangeSet rangeResult;
460 if (!ParseClassRanges(&rangeResult)) {
461 break;
462 }
463 if (isInvert) {
464 rangeResult.Invert(IsUtf16());
465 }
466 uint32_t highValue = rangeResult.HighestValue();
467 if (highValue <= UINT16_MAX) {
468 RangeOpCode rangeOp;
469 rangeOp.InsertOpCode(&buffer_, rangeResult);
470 } else {
471 Range32OpCode rangeOp;
472 rangeOp.InsertOpCode(&buffer_, rangeResult);
473 }
474
475 if (isBackward) {
476 prevOp.EmitOpCode(&buffer_, 0);
477 }
478 break;
479 }
480 case '*':
481 case '+':
482 case '?':
483 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
484 ParseError("nothing to repeat");
485 return;
486 case '{': {
487 uint8_t *begin = pc_ - 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
488 int dummy;
489 if (ParserIntervalQuantifier(&dummy, &dummy)) {
490 ParseError("nothing to repeat");
491 return;
492 }
493 pc_ = begin;
494 Advance();
495 }
496 [[fallthrough]];
497 case '}':
498 case ']':
499 if (IsUtf16()) {
500 ParseError("syntax error");
501 return;
502 }
503 [[fallthrough]];
504 default: {
505 // PatternCharacter
506 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
507 PrintF("PatternCharacter %c\n", c0_);
508 isAtom = true;
509 {
510 PrevOpCode prevOp;
511 if (isBackward) {
512 prevOp.EmitOpCode(&buffer_, 0);
513 }
514 uint32_t matchedChar = c0_;
515 if (c0_ > (INT8_MAX + 1)) {
516 Prev();
517 UChar32 c;
518 int32_t length = end_ - pc_ + 1;
519 // NOLINTNEXTLINE(hicpp-signed-bitwise)
520 auto unicodeChar = base::utf_helper::ConvertUtf8ToUnicodeChar(pc_, length);
521 c = unicodeChar.first;
522 matchedChar = static_cast<uint32_t>(c);
523 pc_ += unicodeChar.second; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
524 }
525 if (IsIgnoreCase()) {
526 matchedChar = static_cast<uint32_t>(Canonicalize(static_cast<int>(matchedChar), IsUtf16()));
527 }
528 if (matchedChar > UINT16_MAX) {
529 Char32OpCode charOp;
530 charOp.EmitOpCode(&buffer_, matchedChar);
531 } else {
532 CharOpCode charOp;
533 charOp.EmitOpCode(&buffer_, matchedChar);
534 }
535 if (isBackward) {
536 prevOp.EmitOpCode(&buffer_, 0);
537 }
538 }
539 Advance();
540 break;
541 }
542 }
543 if (isAtom && !isError_) {
544 ParseQuantifier(atomBcStart, captureIndex, captureCount_ - 1);
545 }
546 if (isBackward) {
547 size_t end = buffer_.GetSize();
548 size_t termSize = end - atomBcStart;
549 size_t moveSize = end - start;
550 buffer_.Expand(end + termSize);
551 if (memmove_s(buffer_.buf_ + start + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
552 termSize, // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
553 moveSize,
554 buffer_.buf_ + start, // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
555 moveSize) != EOK) {
556 LOG_FULL(FATAL) << "memmove_s failed";
557 UNREACHABLE();
558 }
559 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
560 if (memcpy_s(buffer_.buf_ + start, termSize, buffer_.buf_ + end, termSize) != EOK) {
561 LOG_FULL(FATAL) << "memcpy_s failed";
562 UNREACHABLE();
563 }
564 }
565 }
566 }
567
FindGroupName(const CString & name)568 int RegExpParser::FindGroupName(const CString &name)
569 {
570 size_t len = 0;
571 size_t nameLen = name.size();
572 const char *p = reinterpret_cast<char *>(groupNames_.buf_);
573 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
574 const char *bufEnd = reinterpret_cast<char *>(groupNames_.buf_) + groupNames_.size_;
575 int captureIndex = 1;
576 while (p < bufEnd) {
577 len = strlen(p);
578 if (len == nameLen && memcmp(name.c_str(), p, nameLen) == 0) {
579 return captureIndex;
580 }
581 p += len + 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
582 captureIndex++;
583 }
584 return -1;
585 }
586
ParseAssertionCapture(int * captureIndex,bool isBackward)587 bool RegExpParser::ParseAssertionCapture(int *captureIndex, bool isBackward)
588 {
589 bool isAtom = false;
590 do {
591 if (c0_ == '?') {
592 Advance();
593 switch (c0_) {
594 // (?=Disjunction[?U, ?N])
595 case '=': {
596 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
597 PrintF("Assertion(?= Disjunction)\n");
598 Advance();
599 uint32_t start = buffer_.size_;
600 ParseDisjunction(isBackward);
601 MatchOpCode matchOp;
602 matchOp.EmitOpCode(&buffer_, 0);
603 MatchAheadOpCode matchAheadOp;
604 uint32_t len = buffer_.size_ - start;
605 matchAheadOp.InsertOpCode(&buffer_, start, len);
606 break;
607 }
608 // (?!Disjunction[?U, ?N])
609 case '!': {
610 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
611 PrintF("Assertion(?! Disjunction)\n");
612 uint32_t start = buffer_.size_;
613 Advance();
614 ParseDisjunction(isBackward);
615 MatchOpCode matchOp;
616 matchOp.EmitOpCode(&buffer_, 0);
617 NegativeMatchAheadOpCode matchAheadOp;
618 uint32_t len = buffer_.size_ - start;
619 matchAheadOp.InsertOpCode(&buffer_, start, len);
620 break;
621 }
622 case '<': {
623 Advance();
624 // (?<=Disjunction[?U, ?N])
625 if (c0_ == '=') {
626 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
627 PrintF("Assertion(?<= Disjunction)\n");
628 Advance();
629 uint32_t start = buffer_.size_;
630 ParseDisjunction(true);
631 MatchOpCode matchOp;
632 matchOp.EmitOpCode(&buffer_, 0);
633 MatchAheadOpCode matchAheadOp;
634 uint32_t len = buffer_.size_ - start;
635 matchAheadOp.InsertOpCode(&buffer_, start, len);
636 // (?<!Disjunction[?U, ?N])
637 } else if (c0_ == '!') {
638 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
639 PrintF("Assertion(?<! Disjunction)\n");
640 Advance();
641 uint32_t start = buffer_.size_;
642 ParseDisjunction(true);
643 MatchOpCode matchOp;
644 matchOp.EmitOpCode(&buffer_, 0);
645 NegativeMatchAheadOpCode matchAheadOp;
646 uint32_t len = buffer_.size_ - start;
647 matchAheadOp.InsertOpCode(&buffer_, start, len);
648 } else {
649 Prev();
650 CString name;
651 auto **pp = const_cast<const uint8_t **>(&pc_);
652 if (!ParseGroupSpecifier(pp, name)) {
653 ParseError("GroupName Syntax error.");
654 return false;
655 }
656 if (FindGroupName(name) > 0) {
657 ParseError("Duplicate GroupName error.");
658 return false;
659 }
660 groupNames_.EmitStr(name.c_str());
661 newGroupNames_.push_back(name);
662 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
663 PrintF("group name %s", name.c_str());
664 Advance();
665 goto parseCapture; // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto)
666 }
667 break;
668 }
669 // (?:Disjunction[?U, ?N])
670 case ':':
671 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
672 PrintF("Atom(?<: Disjunction)\n");
673 isAtom = true;
674 Advance();
675 ParseDisjunction(isBackward);
676 break;
677 default:
678 Advance();
679 ParseError("? Syntax error.");
680 return false;
681 }
682 if (isError_) {
683 return false;
684 }
685 } else {
686 groupNames_.EmitChar(0);
687 parseCapture:
688 isAtom = true;
689 *captureIndex = captureCount_++;
690 SaveEndOpCode saveEndOp;
691 SaveStartOpCode saveStartOp;
692 if (isBackward) {
693 saveEndOp.EmitOpCode(&buffer_, *captureIndex);
694 } else {
695 saveStartOp.EmitOpCode(&buffer_, *captureIndex);
696 }
697 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
698 PrintF("capture start %d \n", *captureIndex);
699 ParseDisjunction(isBackward);
700 if (isError_) {
701 return false;
702 }
703 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
704 PrintF("capture end %d \n", *captureIndex);
705 if (isBackward) {
706 saveStartOp.EmitOpCode(&buffer_, *captureIndex);
707 } else {
708 saveEndOp.EmitOpCode(&buffer_, *captureIndex);
709 }
710 }
711 } while (c0_ != ')' && c0_ != KEY_EOF);
712 if (c0_ != ')') {
713 ParseError("capture syntax error");
714 return false;
715 }
716 return isAtom;
717 }
718
ParseDecimalDigits()719 int RegExpParser::ParseDecimalDigits()
720 {
721 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
722 PrintF("Parse DecimalDigits------\n");
723 uint32_t result = 0;
724 bool overflow = false;
725 while (true) {
726 if (c0_ < '0' || c0_ > '9') {
727 break;
728 }
729 if (!overflow) {
730 if (UNLIKELY(result > (INT32_MAX - c0_ + '0') / DECIMAL_DIGITS_ADVANCE)) {
731 overflow = true;
732 } else {
733 result = result * DECIMAL_DIGITS_ADVANCE + c0_ - '0';
734 }
735 }
736 Advance();
737 }
738 if (overflow) {
739 return INT32_MAX;
740 }
741 return result;
742 }
743
ParserIntervalQuantifier(int * pmin,int * pmax)744 bool RegExpParser::ParserIntervalQuantifier(int *pmin, int *pmax)
745 {
746 // Quantifier::
747 // QuantifierPrefix
748 // QuantifierPrefix?
749 // QuantifierPrefix::
750 // *
751 // +
752 // ?
753 // {DecimalDigits}
754 // {DecimalDigits,}
755 // {DecimalDigits,DecimalDigits}
756 Advance();
757 *pmin = ParseDecimalDigits();
758 *pmax = *pmin;
759 switch (c0_) {
760 case ',': {
761 Advance();
762 if (c0_ == '}') {
763 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
764 PrintF("QuantifierPrefix{DecimalDigits,}\n");
765 *pmax = INT32_MAX;
766 Advance();
767 } else {
768 *pmax = ParseDecimalDigits();
769 if (c0_ == '}') {
770 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
771 PrintF("QuantifierPrefix{DecimalDigits,DecimalDigits}\n");
772 Advance();
773 } else {
774 return false;
775 }
776 }
777 break;
778 }
779 case '}':
780 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
781 PrintF("QuantifierPrefix{DecimalDigits}\n");
782 Advance();
783 break;
784 default:
785 Advance();
786 return false;
787 }
788 return true;
789 }
790
ParseQuantifier(size_t atomBcStart,int captureStart,int captureEnd)791 void RegExpParser::ParseQuantifier(size_t atomBcStart, int captureStart, int captureEnd)
792 {
793 int min = -1;
794 int max = -1;
795 bool isGreedy = true;
796 switch (c0_) {
797 case '*':
798 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
799 PrintF("QuantifierPrefix %c\n", c0_);
800 min = 0;
801 max = INT32_MAX;
802 Advance();
803 break;
804 case '+':
805 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
806 PrintF("QuantifierPrefix %c\n", c0_);
807 min = 1;
808 max = INT32_MAX;
809 Advance();
810 break;
811 case '?':
812 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
813 PrintF("QuantifierPrefix %c\n", c0_);
814 Advance();
815 min = 0;
816 max = 1;
817 break;
818 case '{': {
819 uint8_t *start = pc_ - 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
820 if (!ParserIntervalQuantifier(&min, &max)) {
821 pc_ = start;
822 Advance(); // back to '{'
823 return;
824 }
825 if (min > max) {
826 ParseError("Invalid repetition count");
827 return;
828 }
829 break;
830 }
831 default:
832 break;
833 }
834 if (c0_ == '?') {
835 isGreedy = false;
836 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
837 PrintF("Quantifier::QuantifierPrefix?\n");
838 Advance();
839 } else if (c0_ == '?' || c0_ == '+' || c0_ == '*' || c0_ == '{') {
840 ParseError("nothing to repeat");
841 return;
842 }
843
844 if (max == 0) {
845 buffer_.size_ = atomBcStart; // Drop all unnecessary bytecode
846 } else if (min != -1 && max != -1 && !isEmpty_) {
847 bool isLoopOp = false;
848 size_t checkCharPara = SIZE_MAX;
849
850 if (captureStart != 0) {
851 SaveResetOpCode saveResetOp;
852 saveResetOp.InsertOpCode(&buffer_, atomBcStart, captureStart, captureEnd);
853 }
854
855 // zero advance check
856 uint8_t firstOp = buffer_.GetU8(atomBcStart);
857 if (max == INT32_MAX && firstOp != RegExpOpCode::OP_CHAR && firstOp != RegExpOpCode::OP_CHAR32 &&
858 firstOp != RegExpOpCode::OP_RANGE && firstOp != RegExpOpCode::OP_RANGE32 &&
859 firstOp != RegExpOpCode::OP_ALL && firstOp != RegExpOpCode::OP_DOTS &&
860 firstOp != RegExpOpCode::OP_SPARSE) {
861 stackCount_++;
862 PushCharOpCode pushCharOp;
863 pushCharOp.InsertOpCode(&buffer_, atomBcStart);
864 CheckCharOpCode checkCharOp;
865 checkCharPara = buffer_.GetSize() + 1;
866 // NOLINTNEXTLINE(readability-magic-numbers)
867 checkCharOp.EmitOpCode(&buffer_, 0);
868 }
869
870 if (min <= 1 && max == INT32_MAX) {
871 if (checkCharPara != SIZE_MAX) {
872 buffer_.PutU32(checkCharPara, RegExpOpCode::GetRegExpOpCode(RegExpOpCode::OP_SPLIT_NEXT)->GetSize());
873 }
874 if (isGreedy) {
875 SplitFirstOpCode splitOp;
876 splitOp.EmitOpCode(&buffer_, atomBcStart - buffer_.GetSize() - splitOp.GetSize());
877 } else {
878 SplitNextOpCode splitOp;
879 splitOp.EmitOpCode(&buffer_, atomBcStart - buffer_.GetSize() - splitOp.GetSize());
880 }
881 } else if (max > 1) {
882 if (checkCharPara != SIZE_MAX) {
883 buffer_.PutU32(checkCharPara, RegExpOpCode::GetRegExpOpCode(RegExpOpCode::OP_LOOP)->GetSize());
884 }
885 if (isGreedy) {
886 LoopGreedyOpCode loopOp;
887 loopOp.EmitOpCode(&buffer_, atomBcStart - buffer_.GetSize() - loopOp.GetSize(), min, max);
888 isLoopOp = true;
889 } else {
890 LoopOpCode loopOp;
891 loopOp.EmitOpCode(&buffer_, atomBcStart - buffer_.GetSize() - loopOp.GetSize(), min, max);
892 isLoopOp = true;
893 }
894 }
895
896 if (min == 0) {
897 if (isGreedy) {
898 SplitNextOpCode splitNextOp;
899 splitNextOp.InsertOpCode(&buffer_, atomBcStart, buffer_.GetSize() - atomBcStart);
900 } else {
901 SplitFirstOpCode splitFirstOp;
902 splitFirstOp.InsertOpCode(&buffer_, atomBcStart, buffer_.GetSize() - atomBcStart);
903 }
904 }
905 if (isLoopOp) {
906 stackCount_++;
907 PushOpCode pushOp;
908 pushOp.InsertOpCode(&buffer_, atomBcStart);
909 PopOpCode popOp;
910 popOp.EmitOpCode(&buffer_);
911 }
912 }
913 isEmpty_ = false;
914 }
915
ParseGroupSpecifier(const uint8_t ** pp,CString & name)916 bool RegExpParser::ParseGroupSpecifier(const uint8_t **pp, CString &name)
917 {
918 const uint8_t *p = *pp;
919 uint32_t c = 0;
920 char buffer[CACHE_SIZE] = {0};
921 char *q = buffer;
922 while (true) {
923 if (p <= end_) {
924 c = *p;
925 } else {
926 c = KEY_EOF;
927 }
928 if (c == '\\') {
929 p++;
930 if (*p != 'u') {
931 return false;
932 }
933 if (!ParseUnicodeEscape(&c)) {
934 return false;
935 }
936 } else if (c == '>') {
937 break;
938 } else if (c > CACHE_SIZE && c != KEY_EOF) {
939 c = static_cast<uint32_t>(base::StringHelper::UnicodeFromUtf8(p, UTF8_CHAR_LEN_MAX, &p));
940 } else if (c != KEY_EOF) {
941 p++;
942 } else {
943 return false;
944 }
945 if (q == buffer) {
946 if (!IsIdentFirst(c)) {
947 return false;
948 }
949 } else {
950 if (!u_isIDPart(c)) {
951 return false;
952 }
953 }
954 if (q != nullptr) {
955 *q++ = c;
956 }
957 } // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
958 p++;
959 *pp = p;
960 name = buffer;
961 return true;
962 }
963
ParseCaptureCount(const char * groupName)964 int RegExpParser::ParseCaptureCount(const char *groupName)
965 {
966 const uint8_t *p = nullptr;
967 int captureIndex = 1;
968 CString name;
969 hasNamedCaptures_ = 0;
970 for (p = base_; p < end_; p++) { // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
971 switch (*p) {
972 case '(': {
973 if (p[1] == '?') { // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
974 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
975 if (p[CAPTURE_CONUT_ADVANCE - 1] == '<' && p[CAPTURE_CONUT_ADVANCE] != '!' &&
976 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
977 p[CAPTURE_CONUT_ADVANCE] != '=') {
978 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
979 hasNamedCaptures_ = 1;
980 p += CAPTURE_CONUT_ADVANCE;
981 if (groupName != nullptr) {
982 if (ParseGroupSpecifier(&p, name)) {
983 if (strcmp(name.c_str(), groupName) == 0) {
984 return captureIndex;
985 }
986 }
987 }
988 captureIndex++;
989 }
990 } else {
991 captureIndex++;
992 }
993 break;
994 }
995 case '\\':
996 p++; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
997 break;
998 case '[': {
999 while (p < end_ && *p != ']') {
1000 if (*p == '\\') {
1001 p++; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1002 }
1003 p++; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1004 }
1005 break;
1006 }
1007 default:
1008 break;
1009 }
1010 }
1011 return captureIndex;
1012 }
1013
1014 // NOLINTNEXTLINE(readability-function-size)
ParseAtomEscape(bool isBackward)1015 int RegExpParser::ParseAtomEscape(bool isBackward)
1016 {
1017 // AtomEscape[U, N]::
1018 // DecimalEscape
1019 // CharacterClassEscape[?U]
1020 // CharacterEscape[?U]
1021 // [+N]kGroupName[?U]
1022 int result = -1;
1023 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1024 PrintF("Parse AtomEscape------\n");
1025 PrevOpCode prevOp;
1026 switch (c0_) {
1027 case KEY_EOF:
1028 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1029 ParseError("unexpected end");
1030 break;
1031 // DecimalEscape
1032 case '1':
1033 case '2':
1034 case '3':
1035 case '4':
1036 case '5':
1037 case '6':
1038 case '7':
1039 case '8':
1040 case '9': {
1041 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1042 PrintF("NonZeroDigit %c\n", c0_);
1043 int capture = ParseDecimalDigits();
1044 if (capture > captureCount_ - 1 && capture > ParseCaptureCount(nullptr) - 1) {
1045 ParseError("invalid backreference count");
1046 break;
1047 }
1048 if (isBackward) {
1049 BackwardBackReferenceOpCode backReferenceOp;
1050 backReferenceOp.EmitOpCode(&buffer_, capture);
1051 } else {
1052 BackReferenceOpCode backReferenceOp;
1053 backReferenceOp.EmitOpCode(&buffer_, capture);
1054 }
1055 break;
1056 }
1057 // CharacterClassEscape
1058 case 'd': {
1059 // [0-9]
1060 RangeOpCode rangeOp;
1061 if (isBackward) {
1062 prevOp.EmitOpCode(&buffer_, 0);
1063 }
1064 rangeOp.InsertOpCode(&buffer_, g_rangeD);
1065 goto parseLookBehind;
1066 }
1067 case 'D': {
1068 // [^0-9]
1069 RangeSet atomRange(g_rangeD);
1070 atomRange.Invert(IsUtf16());
1071 Range32OpCode rangeOp;
1072 if (isBackward) {
1073 prevOp.EmitOpCode(&buffer_, 0);
1074 }
1075 rangeOp.InsertOpCode(&buffer_, atomRange);
1076 goto parseLookBehind;
1077 }
1078 case 's': {
1079 // [\f\n\r\t\v]
1080 RangeOpCode rangeOp;
1081 if (isBackward) {
1082 prevOp.EmitOpCode(&buffer_, 0);
1083 }
1084 rangeOp.InsertOpCode(&buffer_, g_rangeS);
1085 goto parseLookBehind;
1086 }
1087 case 'S': {
1088 RangeSet atomRange(g_rangeS);
1089 Range32OpCode rangeOp;
1090 atomRange.Invert(IsUtf16());
1091 if (isBackward) {
1092 prevOp.EmitOpCode(&buffer_, 0);
1093 }
1094 rangeOp.InsertOpCode(&buffer_, atomRange);
1095 goto parseLookBehind;
1096 }
1097 case 'w': {
1098 // [A-Za-z0-9]
1099 RangeOpCode rangeOp;
1100 if (isBackward) {
1101 prevOp.EmitOpCode(&buffer_, 0);
1102 }
1103 rangeOp.InsertOpCode(&buffer_, g_rangeW);
1104 goto parseLookBehind;
1105 }
1106 case 'W': {
1107 // [^A-Za-z0-9]
1108 RangeSet atomRange(g_rangeW);
1109 atomRange.Invert(IsUtf16());
1110 Range32OpCode rangeOp;
1111 if (isBackward) {
1112 prevOp.EmitOpCode(&buffer_, 0);
1113 }
1114 rangeOp.InsertOpCode(&buffer_, atomRange);
1115 goto parseLookBehind;
1116 }
1117 case 'P':
1118 case 'p': {
1119 //CharacterClassStrings
1120 RangeSet atomRange;
1121 Range32OpCode rangeOp;
1122 ParseClassEscape(&atomRange);
1123 if (isBackward) {
1124 prevOp.EmitOpCode(&buffer_, 0);
1125 }
1126 rangeOp.InsertOpCode(&buffer_, atomRange);
1127 break;
1128 }
1129 // [+N]kGroupName[?U]
1130 case 'k': {
1131 Advance();
1132 if (c0_ != '<') {
1133 if (!IsUtf16() || HasNamedCaptures()) {
1134 ParseError("expecting group name.");
1135 break;
1136 }
1137 }
1138 Advance();
1139 Prev();
1140 CString name;
1141 auto **pp = const_cast<const uint8_t **>(&pc_);
1142 if (!ParseGroupSpecifier(pp, name)) {
1143 ParseError("GroupName Syntax error.");
1144 break;
1145 }
1146 int postion = FindGroupName(name);
1147 if (postion < 0) {
1148 postion = ParseCaptureCount(name.c_str());
1149 if (postion < 0 && (!IsUtf16() || HasNamedCaptures())) {
1150 ParseError("group name not defined");
1151 break;
1152 }
1153 }
1154 if (isBackward) {
1155 BackwardBackReferenceOpCode backReferenceOp;
1156 backReferenceOp.EmitOpCode(&buffer_, postion);
1157 } else {
1158 BackReferenceOpCode backReferenceOp;
1159 backReferenceOp.EmitOpCode(&buffer_, postion);
1160 }
1161 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1162 Advance();
1163 break;
1164 }
1165 parseLookBehind: {
1166 if (isBackward) {
1167 prevOp.EmitOpCode(&buffer_, 0);
1168 }
1169 Advance();
1170 break;
1171 }
1172 default:
1173 result = ParseCharacterEscape();
1174 break;
1175 }
1176 return result;
1177 }
1178
RecountCaptures()1179 int RegExpParser::RecountCaptures()
1180 {
1181 if (totalCaptureCount_ < 0) {
1182 const char *name = reinterpret_cast<const char*>(groupNames_.buf_);
1183 totalCaptureCount_ = ParseCaptureCount(name);
1184 }
1185 return totalCaptureCount_;
1186 }
HasNamedCaptures()1187 bool RegExpParser::HasNamedCaptures()
1188 {
1189 if (hasNamedCaptures_ < 0) {
1190 RecountCaptures();
1191 }
1192 return false;
1193 }
1194
ParseCharacterEscape()1195 int RegExpParser::ParseCharacterEscape()
1196 {
1197 // CharacterEscape[U]::
1198 // ControlEscape
1199 // c ControlLetter
1200 // 0 [lookahead ? DecimalDigit]
1201 // HexEscapeSequence
1202 // RegExpUnicodeEscapeSequence[?U]
1203 // IdentityEscape[?U]
1204 uint32_t result = 0;
1205 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1206 switch (c0_) {
1207 // ControlEscape
1208 case 'f':
1209 result = '\f';
1210 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1211 PrintF("ControlEscape %c\n", c0_);
1212 Advance();
1213 break;
1214 case 'n':
1215 result = '\n';
1216 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1217 PrintF("ControlEscape %c\n", c0_);
1218 Advance();
1219 break;
1220 case 'r':
1221 result = '\r';
1222 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1223 PrintF("ControlEscape %c\n", c0_);
1224 Advance();
1225 break;
1226 case 't':
1227 result = '\t';
1228 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1229 PrintF("ControlEscape %c\n", c0_);
1230 Advance();
1231 break;
1232 case 'v':
1233 result = '\v';
1234 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1235 PrintF("ControlEscape %c\n", c0_);
1236 Advance();
1237 break;
1238 // c ControlLetter
1239 case 'c': {
1240 Advance();
1241 if ((c0_ >= 'A' && c0_ <= 'Z') || (c0_ >= 'a' && c0_ <= 'z')) {
1242 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1243 PrintF("ControlLetter %c\n", c0_);
1244 result = static_cast<uint32_t>(c0_) & 0x1f; // NOLINTNEXTLINE(readability-magic-numbers)
1245 Advance();
1246 } else {
1247 if (!IsUtf16()) {
1248 pc_--; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1249 result = '\\';
1250 } else {
1251 ParseError("Invalid control letter");
1252 return -1;
1253 }
1254 }
1255 break;
1256 }
1257 case '0': {
1258 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1259 PrintF("CharacterEscape 0 [lookahead ? DecimalDigit]\n");
1260 if (IsUtf16() && !(*pc_ >= '0' && *pc_ <= '9')) { // NOLINTNEXTLINE(readability-magic-numbers)
1261 Advance();
1262 result = 0;
1263 break;
1264 }
1265 [[fallthrough]];
1266 }
1267 case '1':
1268 case '2':
1269 case '3':
1270 case '4':
1271 case '5':
1272 case '6':
1273 case '7': {
1274 if (IsUtf16()) {
1275 // With /u, decimal escape is not interpreted as octal character code.
1276 ParseError("Invalid class escape");
1277 return 0;
1278 }
1279 result = ParseOctalLiteral();
1280 break;
1281 }
1282 // ParseHexEscapeSequence
1283 // ParseRegExpUnicodeEscapeSequence
1284 case 'x': {
1285 Advance();
1286 if (ParseHexEscape(UNICODE_HEX_ADVANCE, &result)) {
1287 return result;
1288 }
1289 if (IsUtf16()) {
1290 ParseError("Invalid class escape");
1291 return -1;
1292 }
1293 result = 'x';
1294 break;
1295 }
1296 case 'u': {
1297 Advance();
1298 if (ParseUnicodeEscape(&result)) {
1299 return result;
1300 }
1301 if (IsUtf16()) {
1302 // With /u, invalid escapes are not treated as identity escapes.
1303 ParseError("Invalid unicode escape");
1304 return 0;
1305 }
1306 // If \u is not followed by a two-digit hexadecimal, treat it
1307 // as an identity escape.
1308 result = 'u';
1309 break;
1310 }
1311 // IdentityEscape[?U]
1312 case '$':
1313 case '(':
1314 case ')':
1315 case '*':
1316 case '+':
1317 case '.':
1318 case '/':
1319 case '?':
1320 case '[':
1321 case '\\':
1322 case ']':
1323 case '^':
1324 case '{':
1325 case '|':
1326 case '}':
1327 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1328 PrintF("IdentityEscape %c\n", c0_);
1329 result = c0_;
1330 Advance();
1331 break;
1332 default: {
1333 if (IsUtf16()) {
1334 ParseError("Invalid unicode escape");
1335 return 0;
1336 }
1337 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1338 PrintF("SourceCharacter %c\n", c0_);
1339 result = c0_;
1340 if (result < CHAR_MAXS) {
1341 Advance();
1342 } else {
1343 Prev();
1344 const uint8_t *p = pc_;
1345 result = static_cast<uint32_t>(base::StringHelper::UnicodeFromUtf8(p, UTF8_CHAR_LEN_MAX, &p));
1346 int offset = static_cast<int>(p - pc_);
1347 Advance(offset + 1);
1348 }
1349 break;
1350 }
1351 }
1352 return static_cast<int>(result);
1353 }
1354
ParseClassRanges(RangeSet * result)1355 bool RegExpParser::ParseClassRanges(RangeSet *result)
1356 {
1357 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1358 PrintF("Parse ClassRanges------\n");
1359 while (c0_ != ']') {
1360 RangeSet s1;
1361 bool needInter = false;
1362 uint32_t c1 = ParseClassAtom(&s1);
1363 if (c1 == UINT32_MAX) {
1364 ParseError("invalid class range");
1365 return false;
1366 }
1367 needInter = NeedIntersection(c1);
1368 int next_c0 = *pc_;
1369 if (c0_ == '-' && next_c0 != ']') {
1370 if (c1 == CLASS_RANGE_BASE) {
1371 if (IsUtf16()) {
1372 ParseError("invalid class range");
1373 return false;
1374 }
1375 result->Insert(s1);
1376 continue;
1377 }
1378 Advance();
1379 RangeSet s2;
1380 uint32_t c2 = ParseClassAtom(&s2);
1381 if (c2 == UINT32_MAX) {
1382 ParseError("invalid class range");
1383 return false;
1384 }
1385 if (c2 == CLASS_RANGE_BASE) {
1386 if (IsUtf16()) {
1387 ParseError("invalid class range");
1388 return false;
1389 }
1390 result->Insert(s2);
1391 continue;
1392 }
1393 if (c1 < INT8_MAX) {
1394 if (c1 > c2) {
1395 ParseError("invalid class range");
1396 return false;
1397 }
1398 }
1399 needInter = NeedIntersection(c2);
1400 result->Insert(c1, c2);
1401 if (IsIgnoreCase() && needInter) {
1402 ProcessIntersection(result);
1403 }
1404 } else {
1405 result->Insert(s1);
1406 if (!(IsIgnoreCase() && needInter)) {
1407 continue;
1408 }
1409 if (c1 <= 'z' && c1 >= 'a') {
1410 result->Insert(RangeSet(c1 - 'a' + 'A'));
1411 } else {
1412 result->Insert(RangeSet(c1 - 'A' + 'a'));
1413 }
1414 }
1415 }
1416 Advance();
1417 return true;
1418 }
1419
ParseClassAtom(RangeSet * atom)1420 uint32_t RegExpParser::ParseClassAtom(RangeSet *atom)
1421 {
1422 uint32_t ret = UINT32_MAX;
1423 switch (c0_) {
1424 case '\\': {
1425 Advance();
1426 ret = static_cast<uint32_t>(ParseClassEscape(atom));
1427 break;
1428 }
1429 case KEY_EOF:
1430 break;
1431 case 0: {
1432 if (pc_ >= end_) {
1433 return UINT32_MAX;
1434 }
1435 [[fallthrough]];
1436 }
1437 default: {
1438 uint32_t value = c0_;
1439 size_t u16_size = 0;
1440 if (c0_ > INT8_MAX) { // NOLINTNEXTLINE(readability-magic-numbers)
1441 pc_ -= 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1442 auto u16_result = base::utf_helper::ConvertUtf8ToUtf16Pair(pc_, true);
1443 value = u16_result.first;
1444 u16_size = u16_result.second;
1445 Advance(u16_size + 1);
1446 } else {
1447 Advance();
1448 }
1449 atom->Insert(RangeSet(value));
1450 ret = value;
1451 break;
1452 }
1453 }
1454 return ret;
1455 }
1456
ParseClassEscape(RangeSet * atom)1457 int RegExpParser::ParseClassEscape(RangeSet *atom)
1458 {
1459 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1460 PrintF("Parse ClassEscape------\n");
1461 int result = -1;
1462 switch (c0_) {
1463 case 'b':
1464 Advance();
1465 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1466 PrintF("ClassEscape %c", 'b');
1467 result = '\b';
1468 atom->Insert(RangeSet(static_cast<uint32_t>('\b')));
1469 break;
1470 case '-':
1471 Advance();
1472 result = '-';
1473 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1474 PrintF("ClassEscape %c", '-');
1475 atom->Insert(RangeSet(static_cast<uint32_t>('-')));
1476 break;
1477 // CharacterClassEscape
1478 case 'd':
1479 case 'D':
1480 result = CLASS_RANGE_BASE;
1481 atom->Insert(g_rangeD);
1482 if (c0_ == 'D') {
1483 atom->Invert(IsUtf16());
1484 }
1485 Advance();
1486 break;
1487 case 's':
1488 case 'S':
1489 result = CLASS_RANGE_BASE;
1490 atom->Insert(g_rangeS);
1491 if (c0_ == 'S') {
1492 atom->Invert(IsUtf16());
1493 }
1494 Advance();
1495 break;
1496 case 'w':
1497 case 'W':
1498 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1499 PrintF("ClassEscape::CharacterClassEscape %c\n", c0_);
1500 result = CLASS_RANGE_BASE;
1501 atom->Insert(g_rangeW);
1502 if (c0_ == 'W') {
1503 atom->Invert(IsUtf16());
1504 }
1505 Advance();
1506 break;
1507 case 'P':
1508 case 'p': {
1509 bool negate = (c0_ == 'P');
1510 CString propertyName;
1511 CString valueName;
1512 if (!ParseUnicodePropertyValueCharacters(propertyName, valueName) ||
1513 !ParseUnicodePropertyClassRange(propertyName, valueName, atom, negate)) {
1514 CString msg = "Invalid regular expression of unicode";
1515 ParseError(msg.c_str());
1516 }
1517 result = CLASS_RANGE_BASE;
1518 break;
1519 }
1520 default:
1521 result = ParseCharacterEscape();
1522 int value = result;
1523 if (IsIgnoreCase()) {
1524 value = Canonicalize(value, IsUtf16());
1525 }
1526 atom->Insert(RangeSet(static_cast<uint32_t>(value)));
1527 break;
1528 }
1529 return result;
1530 }
1531
ParseUnicodePropertyValueCharacters(CString & propertyName,CString & valueName)1532 bool RegExpParser::ParseUnicodePropertyValueCharacters(CString &propertyName, CString &valueName)
1533 {
1534 Advance();
1535 if (c0_ == '{') {
1536 if (!GetUnicodePropertyName(propertyName)) {
1537 return false;
1538 }
1539
1540 if (!GetUnicodePropertyValueName(valueName)) {
1541 return false;
1542 }
1543 } else {
1544 return false;
1545 }
1546 Advance();
1547 return true;
1548 }
1549
GetUnicodePropertyName(CString & propertyName)1550 bool RegExpParser::GetUnicodePropertyName(CString &propertyName)
1551 {
1552 Advance();
1553 while (c0_ != '}' && c0_ != '=') {
1554 if (IsUnicodePropertyValueCharacter(c0_)) {
1555 propertyName += c0_;
1556 } else {
1557 return false;
1558 }
1559 Advance();
1560 }
1561 return true;
1562 }
1563
GetUnicodePropertyValueName(CString & valueName)1564 bool RegExpParser::GetUnicodePropertyValueName(CString &valueName)
1565 {
1566 if (c0_ == '=') {
1567 Advance();
1568 while (c0_ != '}') {
1569 if (IsUnicodePropertyValueCharacter(c0_)) {
1570 valueName += c0_;
1571 } else {
1572 return false;
1573 }
1574 Advance();
1575 }
1576 }
1577 return true;
1578 }
1579
1580 // NOLINTNEXTLINE(cert-dcl50-cpp)
PrintF(const char * fmt,...)1581 void RegExpParser::PrintF(const char *fmt, ...)
1582 {
1583 #ifndef _NO_DEBUG_
1584 va_list args;
1585 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,)
1586 va_start(args, fmt);
1587 vprintf(fmt, args);
1588 va_end(args);
1589 #else
1590 (void)fmt;
1591 #endif
1592 }
1593
ParseError(const char * errorMessage)1594 void RegExpParser::ParseError(const char *errorMessage)
1595 {
1596 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1597 PrintF("error: ");
1598 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1599 PrintF(errorMessage);
1600 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1601 PrintF("\n");
1602 SetIsError();
1603 size_t length = strlen(errorMessage) + 1;
1604 if (memcpy_s(errorMsg_, length, errorMessage, length) != EOK) {
1605 LOG_FULL(FATAL) << "memcpy_s failed";
1606 UNREACHABLE();
1607 }
1608 }
1609
IsIdentFirst(uint32_t c)1610 int RegExpParser::IsIdentFirst(uint32_t c)
1611 {
1612 if (c < CACHE_SIZE) {
1613 return (ID_START_TABLE_ASCII[c >> 5] >> (c & 31)) & 1; // 5: Shift five bits 31: and operation binary of 31
1614 } else {
1615 auto uchar = static_cast<UChar32>(c);
1616 return static_cast<int>(u_isIDStart(uchar));
1617 }
1618 }
1619
Canonicalize(int c,bool isUnicode)1620 int RegExpParser::Canonicalize(int c, bool isUnicode)
1621 {
1622 if (c < TMP_BUF_SIZE) { // NOLINTNEXTLINE(readability-magic-numbers)
1623 if (c >= 'a' && c <= 'z') {
1624 c = c - 'a' + 'A';
1625 }
1626 } else {
1627 int cur = c;
1628 if (isUnicode) {
1629 c = u_tolower(static_cast<UChar32>(c));
1630 if (c >= 'a' && c <= 'z') {
1631 c = cur;
1632 }
1633 } else {
1634 c = u_toupper(static_cast<UChar32>(c));
1635 if (c >= 'A' && c <= 'Z') {
1636 c = cur;
1637 }
1638 }
1639 }
1640 return c;
1641 }
1642
NeedIntersection(uint32_t c)1643 bool RegExpParser::NeedIntersection(uint32_t c)
1644 {
1645 return (c <= 'z' && c >= 'a') || (c <= 'Z' && c >= 'A');
1646 }
1647
DoParserStackOverflowCheck(const char * errorMessage)1648 void RegExpParser::DoParserStackOverflowCheck(const char *errorMessage)
1649 {
1650 if (UNLIKELY(thread_->GetCurrentStackPosition() < thread_->GetStackLimit())) {
1651 LOG_ECMA(ERROR) << "Stack overflow! current:" << thread_->GetCurrentStackPosition() <<
1652 " limit:" << thread_->GetStackLimit();
1653 ParseError(errorMessage);
1654 return;
1655 }
1656 }
1657
ParseUnicodePropertyClassRange(CString & propertyName,CString & valueName,RangeSet * atom,bool negate)1658 bool RegExpParser::ParseUnicodePropertyClassRange(CString &propertyName, CString &valueName,
1659 RangeSet *atom, bool negate)
1660 {
1661 const char *name = propertyName.c_str();
1662 if (valueName.size() == 0) {
1663 if (MatchUnicodeProperty(UCHAR_GENERAL_CATEGORY_MASK, name, atom, negate)) {
1664 return true;
1665 }
1666 if (MatchSepcialUnicodeProperty(propertyName, negate, atom)) {
1667 return true;
1668 }
1669 UProperty property = u_getPropertyEnum(name);
1670 if (!IsSupportedBinaryProperty(property)) {
1671 return false;
1672 }
1673 if (!IsExactPropertyAlias(name, property)) {
1674 return false;
1675 }
1676 if (negate && IsBinaryPropertyOfStrings(property)) {
1677 return false;
1678 }
1679 return MatchUnicodeProperty(property, negate ? "N" : "Y", atom, false);
1680 } else {
1681 UProperty property = u_getPropertyEnum(propertyName.c_str());
1682 if (property == UCHAR_GENERAL_CATEGORY) {
1683 property = UCHAR_GENERAL_CATEGORY_MASK;
1684 } else if (property != UCHAR_SCRIPT && property != UCHAR_SCRIPT_EXTENSIONS) {
1685 return false;
1686 }
1687 return MatchUnicodeProperty(property, valueName.c_str(), atom, negate);
1688 }
1689 }
1690
MatchUnicodeProperty(UProperty property,const char * propertyName,RangeSet * atom,bool negate)1691 bool RegExpParser::MatchUnicodeProperty(UProperty property, const char* propertyName, RangeSet *atom, bool negate)
1692 {
1693 UProperty propertyForMatch = property;
1694 if (propertyForMatch == UCHAR_SCRIPT_EXTENSIONS) {
1695 propertyForMatch = UCHAR_SCRIPT;
1696 }
1697 int32_t propertyValue = u_getPropertyValueEnum(propertyForMatch, propertyName);
1698 if (propertyValue == UCHAR_INVALID_CODE) {
1699 return false;
1700 }
1701 if (!IsExactPropertyValueAlis(propertyName, propertyForMatch, propertyValue)) {
1702 return false;
1703 }
1704 UErrorCode ec = U_ZERO_ERROR;
1705 icu::UnicodeSet set;
1706 set.applyIntPropertyValue(property, propertyValue, ec);
1707 bool success = ec == U_ZERO_ERROR && !set.isEmpty();
1708 if (success) {
1709 const bool caseFolding = IsIgnoreCase();
1710 if (negate) {
1711 set.complement();
1712 }
1713 if (caseFolding) {
1714 set.closeOver(USET_CASE_INSENSITIVE);
1715 }
1716 set.removeAllStrings();
1717 for (int i = 0; i < set.getRangeCount(); i++) {
1718 atom->Insert(set.getRangeStart(i), set.getRangeEnd(i));
1719 }
1720 }
1721 return success;
1722 }
1723
IsExactPropertyValueAlis(const char * valueName,UProperty property,int32_t propertyValue)1724 bool RegExpParser::IsExactPropertyValueAlis(const char *valueName, UProperty property, int32_t propertyValue)
1725 {
1726 const char *shortName = u_getPropertyValueName(property, propertyValue, U_SHORT_PROPERTY_NAME);
1727 if (shortName != nullptr && strcmp(valueName, shortName) == 0) {
1728 return true;
1729 }
1730 int i = 0;
1731 bool flag = true;
1732 while (flag) {
1733 const char *longName = u_getPropertyValueName(property, propertyValue,
1734 static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i));
1735 if (longName == nullptr) {
1736 flag = false;
1737 break;
1738 }
1739 if (strcmp(valueName, longName) == 0) {
1740 return true;
1741 }
1742 i++;
1743 }
1744 return false;
1745 }
1746
IsExactPropertyAlias(const char * propertyName,UProperty property)1747 bool RegExpParser::IsExactPropertyAlias(const char* propertyName, UProperty property)
1748 {
1749 const char* shortName = u_getPropertyName(property, U_SHORT_PROPERTY_NAME);
1750 if (shortName != nullptr && strcmp(propertyName, shortName) == 0) {
1751 return true;
1752 }
1753 int i = 0;
1754 bool flag = true;
1755 while (flag) {
1756 const char* longName = u_getPropertyName(property,
1757 static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i));
1758 if (longName == nullptr) {
1759 flag = false;
1760 break;
1761 }
1762 if (strcmp(propertyName, longName) == 0) {
1763 return true;
1764 }
1765 i++;
1766 }
1767 return false;
1768 }
1769
MatchSepcialUnicodeProperty(CString & name,bool negate,RangeSet * atom)1770 bool RegExpParser::MatchSepcialUnicodeProperty(CString &name, bool negate, RangeSet *atom)
1771 {
1772 if (name == "Any") {
1773 if (!negate) {
1774 atom->Insert(0, 0x10FFFF);
1775 }
1776 } else if (name == "ASCII") {
1777 if (negate) {
1778 atom->Insert(0x80, 0x10FFFF);
1779 } else {
1780 atom->Insert(0x0, 0x7F);
1781 }
1782 } else if (name == "Assigned") {
1783 return MatchUnicodeProperty(UCHAR_GENERAL_CATEGORY, "Unassigned", atom, !negate);
1784 } else {
1785 return false;
1786 }
1787 return true;
1788 }
1789
IsSupportedBinaryProperty(UProperty property)1790 bool RegExpParser::IsSupportedBinaryProperty(UProperty property)
1791 {
1792 switch (property) {
1793 case UCHAR_ALPHABETIC:
1794 case UCHAR_ASCII_HEX_DIGIT:
1795 case UCHAR_BIDI_CONTROL:
1796 case UCHAR_BIDI_MIRRORED:
1797 case UCHAR_DASH:
1798 case UCHAR_DEFAULT_IGNORABLE_CODE_POINT:
1799 case UCHAR_DEPRECATED:
1800 case UCHAR_DIACRITIC:
1801 case UCHAR_JOIN_CONTROL:
1802 case UCHAR_IDS_TRINARY_OPERATOR:
1803 case UCHAR_IDS_BINARY_OPERATOR:
1804 case UCHAR_IDEOGRAPHIC:
1805 case UCHAR_S_TERM:
1806 case UCHAR_ID_START:
1807 case UCHAR_ID_CONTINUE:
1808 case UCHAR_HEX_DIGIT:
1809 case UCHAR_GRAPHEME_EXTEND:
1810 case UCHAR_GRAPHEME_BASE:
1811 case UCHAR_EXTENDER:
1812 case UCHAR_LOGICAL_ORDER_EXCEPTION:
1813 case UCHAR_LOWERCASE:
1814 case UCHAR_MATH:
1815 case UCHAR_NONCHARACTER_CODE_POINT:
1816 case UCHAR_QUOTATION_MARK:
1817 case UCHAR_RADICAL:
1818 case UCHAR_SOFT_DOTTED:
1819 case UCHAR_TERMINAL_PUNCTUATION:
1820 case UCHAR_UNIFIED_IDEOGRAPH:
1821 case UCHAR_UPPERCASE:
1822 case UCHAR_WHITE_SPACE:
1823 case UCHAR_XID_CONTINUE:
1824 case UCHAR_XID_START:
1825 case UCHAR_VARIATION_SELECTOR:
1826 case UCHAR_PATTERN_SYNTAX:
1827 case UCHAR_PATTERN_WHITE_SPACE:
1828 case UCHAR_CASED:
1829 case UCHAR_CASE_IGNORABLE:
1830 case UCHAR_CHANGES_WHEN_LOWERCASED:
1831 case UCHAR_CHANGES_WHEN_UPPERCASED:
1832 case UCHAR_CHANGES_WHEN_TITLECASED:
1833 case UCHAR_CHANGES_WHEN_CASEFOLDED:
1834 case UCHAR_CHANGES_WHEN_CASEMAPPED:
1835 case UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED:
1836 case UCHAR_REGIONAL_INDICATOR:
1837 case UCHAR_EMOJI:
1838 case UCHAR_EMOJI_PRESENTATION:
1839 case UCHAR_EMOJI_MODIFIER:
1840 case UCHAR_EMOJI_MODIFIER_BASE:
1841 case UCHAR_EMOJI_COMPONENT:
1842 case UCHAR_EXTENDED_PICTOGRAPHIC:
1843 return true;
1844 case UCHAR_BASIC_EMOJI:
1845 case UCHAR_EMOJI_KEYCAP_SEQUENCE:
1846 case UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE:
1847 case UCHAR_RGI_EMOJI_FLAG_SEQUENCE:
1848 case UCHAR_RGI_EMOJI_TAG_SEQUENCE:
1849 case UCHAR_RGI_EMOJI_ZWJ_SEQUENCE:
1850 case UCHAR_RGI_EMOJI:
1851 return false;
1852 default:
1853 break;
1854 }
1855 return false;
1856 }
1857
IsBinaryPropertyOfStrings(UProperty property)1858 bool RegExpParser::IsBinaryPropertyOfStrings(UProperty property)
1859 {
1860 switch (property) {
1861 case UCHAR_BASIC_EMOJI:
1862 case UCHAR_EMOJI_KEYCAP_SEQUENCE:
1863 case UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE:
1864 case UCHAR_RGI_EMOJI_FLAG_SEQUENCE:
1865 case UCHAR_RGI_EMOJI_TAG_SEQUENCE:
1866 case UCHAR_RGI_EMOJI_ZWJ_SEQUENCE:
1867 case UCHAR_RGI_EMOJI:
1868 return true;
1869 default:
1870 break;
1871 }
1872 return false;
1873 }
1874 } // namespace panda::ecmascript
1875