1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ecmascript/regexp/regexp_parser.h"
17
18 #include "ecmascript/base/string_helper.h"
19 #include "ecmascript/ecma_macros.h"
20 #include "ecmascript/regexp/regexp_opcode.h"
21 #include "libpandabase/utils/utils.h"
22 #include "securec.h"
23 #include "unicode/uniset.h"
24
25 #define _NO_DEBUG_
26
27 namespace panda::ecmascript {
28 static RangeSet g_rangeD(0x30, 0x39); // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
29 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
30 static RangeSet g_rangeS({
31 std::pair<uint32_t, uint32_t>(0x0009, 0x000D), // NOLINTNEXTLINE(readability-magic-numbers)
32 std::pair<uint32_t, uint32_t>(0x0020, 0x0020), // NOLINTNEXTLINE(readability-magic-numbers)
33 std::pair<uint32_t, uint32_t>(0x00A0, 0x00A0), // NOLINTNEXTLINE(readability-magic-numbers)
34 std::pair<uint32_t, uint32_t>(0x1680, 0x1680), // NOLINTNEXTLINE(readability-magic-numbers)
35 std::pair<uint32_t, uint32_t>(0x2000, 0x200A), // NOLINTNEXTLINE(readability-magic-numbers)
36 /* 2028;LINE SEPARATOR;Zl;0;WS;;;;;N;;;;; */
37 /* 2029;PARAGRAPH SEPARATOR;Zp;0;B;;;;;N;;;;; */
38 std::pair<uint32_t, uint32_t>(0x2028, 0x2029), // NOLINTNEXTLINE(readability-magic-numbers)
39 std::pair<uint32_t, uint32_t>(0x202F, 0x202F), // NOLINTNEXTLINE(readability-magic-numbers)
40 std::pair<uint32_t, uint32_t>(0x205F, 0x205F), // NOLINTNEXTLINE(readability-magic-numbers)
41 std::pair<uint32_t, uint32_t>(0x3000, 0x3000), // NOLINTNEXTLINE(readability-magic-numbers)
42 /* FEFF;ZERO WIDTH NO-BREAK SPACE;Cf;0;BN;;;;;N;BYTE ORDER MARK;;;; */
43 std::pair<uint32_t, uint32_t>(0xFEFF, 0xFEFF), // NOLINTNEXTLINE(readability-magic-numbers)
44 });
45
46 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
47 static RangeSet g_rangeW({
48 std::pair<uint32_t, uint32_t>(0x0030, 0x0039), // NOLINTNEXTLINE(readability-magic-numbers)
49 std::pair<uint32_t, uint32_t>(0x0041, 0x005A), // NOLINTNEXTLINE(readability-magic-numbers)
50 std::pair<uint32_t, uint32_t>(0x005F, 0x005F), // NOLINTNEXTLINE(readability-magic-numbers)
51 std::pair<uint32_t, uint32_t>(0x0061, 0x007A), // NOLINTNEXTLINE(readability-magic-numbers)
52 });
53
54 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
55 static RangeSet g_regexpIdentifyStart({
56 std::pair<uint32_t, uint32_t>(0x0024, 0x0024), // NOLINTNEXTLINE(readability-magic-numbers)
57 std::pair<uint32_t, uint32_t>(0x0041, 0x005A), // NOLINTNEXTLINE(readability-magic-numbers)
58 std::pair<uint32_t, uint32_t>(0x0061, 0x007A), // NOLINTNEXTLINE(readability-magic-numbers)
59 });
60
61 // NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
62 static RangeSet g_regexpIdentifyContinue({
63 std::pair<uint32_t, uint32_t>(0x0024, 0x0024), // NOLINTNEXTLINE(readability-magic-numbers)
64 std::pair<uint32_t, uint32_t>(0x0030, 0x0039), // NOLINTNEXTLINE(readability-magic-numbers)
65 std::pair<uint32_t, uint32_t>(0x0041, 0x005A), // NOLINTNEXTLINE(readability-magic-numbers)
66 std::pair<uint32_t, uint32_t>(0x0061, 0x007A), // NOLINTNEXTLINE(readability-magic-numbers)
67 });
68
Parse()69 void RegExpParser::Parse()
70 {
71 // dynbuffer head init [size,capture_count,statck_count,flags]
72 buffer_.EmitU32(0);
73 buffer_.EmitU32(0);
74 buffer_.EmitU32(0);
75 buffer_.EmitU32(0);
76 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
77 PrintF("Parse Pattern------\n");
78 // Pattern[U, N]::
79 // Disjunction[?U, ?N]
80 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
81 Advance();
82 SaveStartOpCode saveStartOp;
83 int captureIndex = captureCount_++;
84 saveStartOp.EmitOpCode(&buffer_, captureIndex);
85 ParseDisjunction(false);
86 if (c0_ != KEY_EOF) {
87 ParseError("extraneous characters at the end");
88 return;
89 }
90 SaveEndOpCode saveEndOp;
91 saveEndOp.EmitOpCode(&buffer_, captureIndex);
92 MatchEndOpCode matchEndOp;
93 matchEndOp.EmitOpCode(&buffer_, 0);
94 // dynbuffer head assignments
95 buffer_.PutU32(0, buffer_.size_);
96 buffer_.PutU32(NUM_CAPTURE__OFFSET, captureCount_);
97 buffer_.PutU32(NUM_STACK_OFFSET, stackCount_);
98 buffer_.PutU32(FLAGS_OFFSET, flags_);
99 #ifndef _NO_DEBUG_
100 RegExpOpCode::DumpRegExpOpCode(std::cout, buffer_);
101 #endif
102 }
103
ParseDisjunction(bool isBackward)104 void RegExpParser::ParseDisjunction(bool isBackward)
105 {
106 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
107 PrintF("Parse Disjunction------\n");
108 size_t start = buffer_.size_;
109 ParseAlternative(isBackward);
110 if (isError_) {
111 return;
112 }
113 do {
114 if (c0_ == '|') {
115 SplitNextOpCode splitOp;
116 uint32_t len = buffer_.size_ - start;
117 GotoOpCode gotoOp;
118 splitOp.InsertOpCode(&buffer_, start, len + gotoOp.GetSize());
119 uint32_t pos = gotoOp.EmitOpCode(&buffer_, 0) - gotoOp.GetSize();
120 Advance();
121 ParseAlternative(isBackward);
122 gotoOp.UpdateOpPara(&buffer_, pos, buffer_.size_ - pos - gotoOp.GetSize());
123 }
124 } while (c0_ != KEY_EOF && c0_ != ')');
125 }
126
ParseOctalLiteral()127 uint32_t RegExpParser::ParseOctalLiteral()
128 {
129 // For compatibility with some other browsers (not all), we parse
130 // up to three octal digits with a value below 256.
131 // ES#prod-annexB-LegacyOctalEscapeSequence
132 uint32_t value = c0_ - '0';
133 Advance();
134 if (c0_ >= '0' && c0_ <= '7') {
135 value = value * OCTAL_VALUE + c0_ - '0';
136 Advance();
137 if (value < OCTAL_VALUE_RANGE && c0_ >= '0' && c0_ <= '7') {
138 value = value * OCTAL_VALUE + c0_ - '0';
139 Advance();
140 }
141 }
142 return value;
143 }
144
ParseUnlimitedLengthHexNumber(uint32_t maxValue,uint32_t * value)145 bool RegExpParser::ParseUnlimitedLengthHexNumber(uint32_t maxValue, uint32_t *value)
146 {
147 uint32_t x = 0;
148 int d = HexValue(c0_);
149 if (d < 0) {
150 return false;
151 }
152 while (d >= 0) {
153 if (UNLIKELY(x > (std::numeric_limits<uint32_t>::max() - d) / HEX_VALUE)) {
154 LOG_ECMA(FATAL) << "value overflow";
155 return false;
156 }
157 x = x * HEX_VALUE + d;
158 if (x > maxValue) {
159 return false;
160 }
161 Advance();
162 d = HexValue(c0_);
163 }
164 *value = x;
165 return true;
166 }
167
168 // This parses RegExpUnicodeEscapeSequence as described in ECMA262.
ParseUnicodeEscape(uint32_t * value)169 bool RegExpParser::ParseUnicodeEscape(uint32_t *value)
170 {
171 // Accept both \uxxxx and \u{xxxxxx} (if allowed).
172 // In the latter case, the number of hex digits between { } is arbitrary.
173 // \ and u have already been read.
174 if (c0_ == '{' && IsUtf16()) {
175 uint8_t *start = pc_ - 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
176 Advance();
177 if (ParseUnlimitedLengthHexNumber(0x10FFFF, value)) { // NOLINTNEXTLINE(readability-magic-numbers)
178 if (c0_ == '}') {
179 Advance();
180 return true;
181 }
182 }
183 pc_ = start;
184 Advance();
185 return false;
186 }
187 // \u but no {, or \u{...} escapes not allowed.
188 bool result = ParseHexEscape(UNICODE_HEX_VALUE, value);
189 if (result && IsUtf16() && U16_IS_LEAD(*value) && c0_ == '\\') {
190 // Attempt to read trail surrogate.
191 uint8_t *start = pc_ - 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
192 if (*pc_ == 'u') {
193 Advance(UNICODE_HEX_ADVANCE);
194 uint32_t trail;
195 if (ParseHexEscape(UNICODE_HEX_VALUE, &trail) && U16_IS_TRAIL(trail)) {
196 *value = U16_GET_SUPPLEMENTARY((*value), (trail)); // NOLINTNEXTLINE(hicpp-signed-bitwise)
197 return true;
198 }
199 }
200 pc_ = start;
201 Advance();
202 }
203 return result;
204 }
205
ParseHexEscape(int length,uint32_t * value)206 bool RegExpParser::ParseHexEscape(int length, uint32_t *value)
207 {
208 uint8_t *start = pc_ - 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
209 uint32_t val = 0;
210 for (int i = 0; i < length; ++i) {
211 uint32_t c = c0_;
212 int d = HexValue(c);
213 if (d < 0) {
214 pc_ = start;
215 Advance();
216 return false;
217 }
218 val = val * HEX_VALUE + d;
219 Advance();
220 }
221 *value = val;
222 return true;
223 }
224
225 // NOLINTNEXTLINE(readability-function-size)
ParseAlternative(bool isBackward)226 void RegExpParser::ParseAlternative(bool isBackward)
227 {
228 size_t start = buffer_.size_;
229 while (c0_ != '|' && c0_ != KEY_EOF && c0_ != ')') {
230 if (isError_) {
231 return;
232 }
233 size_t atomBcStart = buffer_.GetSize();
234 int captureIndex = 0;
235 bool isAtom = false;
236 switch (c0_) {
237 case '^': {
238 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
239 PrintF("Assertion %c line start \n", c0_);
240 LineStartOpCode lineStartOp;
241 lineStartOp.EmitOpCode(&buffer_, 0);
242 Advance();
243 break;
244 }
245 case '$': {
246 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
247 PrintF("Assertion %c line end \n", c0_);
248 LineEndOpCode lineEndOp;
249 lineEndOp.EmitOpCode(&buffer_, 0);
250 Advance();
251 break;
252 }
253 case '\\': {
254 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
255 PrintF("Escape %c \n", c0_);
256 Advance();
257 switch (c0_) {
258 case 'b': {
259 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
260 PrintF("Assertion %c \n", c0_);
261 WordBoundaryOpCode wordBoundaryOp;
262 wordBoundaryOp.EmitOpCode(&buffer_, 0);
263 Advance();
264 break;
265 }
266 case 'B': {
267 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
268 PrintF("Assertion %c \n", c0_);
269 NotWordBoundaryOpCode notWordBoundaryOp;
270 notWordBoundaryOp.EmitOpCode(&buffer_, 0);
271 Advance();
272 break;
273 }
274 default: {
275 isAtom = true;
276 int atomValue = ParseAtomEscape(isBackward);
277 if (atomValue != -1) {
278 if (IsIgnoreCase()) {
279 if (!IsUtf16()) {
280 atomValue = Canonicalize(atomValue, false);
281 } else {
282 icu::UnicodeSet set(atomValue, atomValue);
283 set.closeOver(USET_CASE_INSENSITIVE);
284 set.removeAllStrings();
285 int32_t size = set.size();
286 RangeOpCode rangeOp;
287 RangeSet rangeResult;
288 for (int32_t idx = 0; idx < size; idx++) {
289 int32_t uc = set.charAt(idx);
290 RangeSet curRange(uc);
291 rangeResult.Insert(curRange);
292 }
293 rangeOp.InsertOpCode(&buffer_, rangeResult);
294 break;
295 }
296 }
297 if (atomValue <= UINT16_MAX) {
298 CharOpCode charOp;
299 charOp.EmitOpCode(&buffer_, atomValue);
300 } else {
301 Char32OpCode charOp;
302 charOp.EmitOpCode(&buffer_, atomValue);
303 }
304 }
305 break;
306 }
307 }
308 break;
309 }
310 case '(': {
311 Advance();
312 isAtom = ParseAssertionCapture(&captureIndex, isBackward);
313 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
314 Advance();
315 break;
316 }
317 case '.': {
318 PrevOpCode prevOp;
319 if (isBackward) {
320 prevOp.EmitOpCode(&buffer_, 0);
321 }
322 if (IsDotAll()) {
323 AllOpCode allOp;
324 allOp.EmitOpCode(&buffer_, 0);
325 } else {
326 DotsOpCode dotsOp;
327 dotsOp.EmitOpCode(&buffer_, 0);
328 }
329 if (isBackward) {
330 prevOp.EmitOpCode(&buffer_, 0);
331 }
332 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
333 PrintF("Atom %c match any \n", c0_);
334 isAtom = true;
335 Advance();
336 break;
337 }
338 case '[': {
339 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
340 PrintF("Atom %c match range \n", c0_);
341 isAtom = true;
342 PrevOpCode prevOp;
343 Advance();
344 if (isBackward) {
345 prevOp.EmitOpCode(&buffer_, 0);
346 }
347 bool isInvert = false;
348 if (c0_ == '^') {
349 isInvert = true;
350 Advance();
351 }
352 RangeSet rangeResult;
353 if (!ParseClassRanges(&rangeResult)) {
354 break;
355 }
356 if (isInvert) {
357 rangeResult.Invert(IsUtf16());
358 }
359 uint32_t highValue = rangeResult.HighestValue();
360 if (highValue <= UINT16_MAX) {
361 RangeOpCode rangeOp;
362 rangeOp.InsertOpCode(&buffer_, rangeResult);
363 } else {
364 Range32OpCode rangeOp;
365 rangeOp.InsertOpCode(&buffer_, rangeResult);
366 }
367
368 if (isBackward) {
369 prevOp.EmitOpCode(&buffer_, 0);
370 }
371 break;
372 }
373 case '*':
374 case '+':
375 case '?': {
376 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
377 ParseError("nothing to repeat");
378 return;
379 }
380 case '{': {
381 uint8_t *begin = pc_ - 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
382 int dummy;
383 if (ParserIntervalQuantifier(&dummy, &dummy)) {
384 ParseError("nothing to repeat");
385 return;
386 }
387 pc_ = begin;
388 Advance();
389 [[fallthrough]];
390 }
391 case '}':
392 case ']': {
393 if (IsUtf16()) {
394 ParseError("syntax error");
395 return;
396 }
397 [[fallthrough]];
398 }
399 default: {
400 // PatternCharacter
401 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
402 PrintF("PatternCharacter %c\n", c0_);
403 isAtom = true;
404 {
405 PrevOpCode prevOp;
406 if (isBackward) {
407 prevOp.EmitOpCode(&buffer_, 0);
408 }
409 uint32_t matchedChar = c0_;
410 if (c0_ > (INT8_MAX + 1)) {
411 Prev();
412 int i = 0;
413 UChar32 c;
414 int32_t length = end_ - pc_ + 1;
415 // NOLINTNEXTLINE(hicpp-signed-bitwise)
416 U8_NEXT(pc_, i, length, c); // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
417 matchedChar = c;
418 pc_ += i; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
419 }
420 if (IsIgnoreCase()) {
421 matchedChar = Canonicalize(matchedChar, IsUtf16());
422 }
423 if (matchedChar > UINT16_MAX) {
424 Char32OpCode charOp;
425 charOp.EmitOpCode(&buffer_, matchedChar);
426 } else {
427 CharOpCode charOp;
428 charOp.EmitOpCode(&buffer_, matchedChar);
429 }
430 if (isBackward) {
431 prevOp.EmitOpCode(&buffer_, 0);
432 }
433 }
434 Advance();
435 break;
436 }
437 }
438 if (isAtom && !isError_) {
439 ParseQuantifier(atomBcStart, captureIndex, captureCount_ - 1);
440 }
441 if (isBackward) {
442 size_t end = buffer_.GetSize();
443 size_t termSize = end - atomBcStart;
444 size_t moveSize = end - start;
445 buffer_.Expand(end + termSize);
446 if (memmove_s(buffer_.buf_ + start + // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
447 termSize, // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
448 moveSize,
449 buffer_.buf_ + start, // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
450 moveSize) != EOK) {
451 LOG_ECMA(FATAL) << "memmove_s failed";
452 UNREACHABLE();
453 }
454 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
455 if (memcpy_s(buffer_.buf_ + start, termSize, buffer_.buf_ + end, termSize) != EOK) {
456 LOG_ECMA(FATAL) << "memcpy_s failed";
457 UNREACHABLE();
458 }
459 }
460 }
461 }
462
FindGroupName(const CString & name)463 int RegExpParser::FindGroupName(const CString &name)
464 {
465 size_t len = 0;
466 size_t nameLen = name.size();
467 const char *p = reinterpret_cast<char *>(groupNames_.buf_);
468 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
469 const char *bufEnd = reinterpret_cast<char *>(groupNames_.buf_) + groupNames_.size_;
470 int captureIndex = 1;
471 while (p < bufEnd) {
472 len = strlen(p);
473 if (len == nameLen && memcmp(name.c_str(), p, nameLen) == 0) {
474 return captureIndex;
475 }
476 p += len + 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
477 captureIndex++;
478 }
479 return -1;
480 }
481
ParseAssertionCapture(int * captureIndex,bool isBackward)482 bool RegExpParser::ParseAssertionCapture(int *captureIndex, bool isBackward)
483 {
484 bool isAtom = false;
485 do {
486 if (c0_ == '?') {
487 Advance();
488 switch (c0_) {
489 // (?=Disjunction[?U, ?N])
490 case '=': {
491 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
492 PrintF("Assertion(?= Disjunction)\n");
493 Advance();
494 uint32_t start = buffer_.size_;
495 ParseDisjunction(isBackward);
496 MatchOpCode matchOp;
497 matchOp.EmitOpCode(&buffer_, 0);
498 MatchAheadOpCode matchAheadOp;
499 uint32_t len = buffer_.size_ - start;
500 matchAheadOp.InsertOpCode(&buffer_, start, len);
501 break;
502 }
503 // (?!Disjunction[?U, ?N])
504 case '!': {
505 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
506 PrintF("Assertion(?! Disjunction)\n");
507 uint32_t start = buffer_.size_;
508 Advance();
509 ParseDisjunction(isBackward);
510 MatchOpCode matchOp;
511 matchOp.EmitOpCode(&buffer_, 0);
512 NegativeMatchAheadOpCode matchAheadOp;
513 uint32_t len = buffer_.size_ - start;
514 matchAheadOp.InsertOpCode(&buffer_, start, len);
515 break;
516 }
517 case '<': {
518 Advance();
519 // (?<=Disjunction[?U, ?N])
520 if (c0_ == '=') {
521 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
522 PrintF("Assertion(?<= Disjunction)\n");
523 Advance();
524 uint32_t start = buffer_.size_;
525 ParseDisjunction(true);
526 MatchOpCode matchOp;
527 matchOp.EmitOpCode(&buffer_, 0);
528 MatchAheadOpCode matchAheadOp;
529 uint32_t len = buffer_.size_ - start;
530 matchAheadOp.InsertOpCode(&buffer_, start, len);
531 // (?<!Disjunction[?U, ?N])
532 } else if (c0_ == '!') {
533 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
534 PrintF("Assertion(?<! Disjunction)\n");
535 Advance();
536 uint32_t start = buffer_.size_;
537 ParseDisjunction(true);
538 MatchOpCode matchOp;
539 matchOp.EmitOpCode(&buffer_, 0);
540 NegativeMatchAheadOpCode matchAheadOp;
541 uint32_t len = buffer_.size_ - start;
542 matchAheadOp.InsertOpCode(&buffer_, start, len);
543 } else {
544 Prev();
545 CString name;
546 auto **pp = const_cast<const uint8_t **>(&pc_);
547 if (!ParseGroupSpecifier(pp, name)) {
548 ParseError("GroupName Syntax error.");
549 return false;
550 }
551 if (FindGroupName(name) > 0) {
552 ParseError("Duplicate GroupName error.");
553 return false;
554 }
555 groupNames_.EmitStr(name.c_str());
556 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
557 PrintF("group name %s", name.c_str());
558 Advance();
559 goto parseCapture; // NOLINTNEXTLINE(cppcoreguidelines-avoid-goto)
560 }
561 break;
562 }
563 // (?:Disjunction[?U, ?N])
564 case ':': {
565 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
566 PrintF("Atom(?<: Disjunction)\n");
567 isAtom = true;
568 Advance();
569 ParseDisjunction(isBackward);
570 break;
571 }
572 default: {
573 Advance();
574 ParseError("? Syntax error.");
575 return false;
576 }
577 }
578 } else {
579 groupNames_.EmitChar(0);
580 parseCapture:
581 isAtom = true;
582 *captureIndex = captureCount_++;
583 SaveEndOpCode saveEndOp;
584 SaveStartOpCode saveStartOp;
585 if (isBackward) {
586 saveEndOp.EmitOpCode(&buffer_, *captureIndex);
587 } else {
588 saveStartOp.EmitOpCode(&buffer_, *captureIndex);
589 }
590 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
591 PrintF("capture start %d \n", *captureIndex);
592 ParseDisjunction(isBackward);
593 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
594 PrintF("capture end %d \n", *captureIndex);
595 if (isBackward) {
596 saveStartOp.EmitOpCode(&buffer_, *captureIndex);
597 } else {
598 saveEndOp.EmitOpCode(&buffer_, *captureIndex);
599 }
600 }
601 } while (c0_ != ')' && c0_ != KEY_EOF);
602 if (c0_ != ')') {
603 ParseError("capture syntax error");
604 return false;
605 }
606 return isAtom;
607 }
608
ParseDecimalDigits()609 int RegExpParser::ParseDecimalDigits()
610 {
611 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
612 PrintF("Parse DecimalDigits------\n");
613 int result = 0;
614 bool overflow = false;
615 while (true) {
616 if (c0_ < '0' || c0_ > '9') {
617 break;
618 }
619 if (!overflow) {
620 if (UNLIKELY(result > (INT32_MAX - c0_ + '0') / DECIMAL_DIGITS_ADVANCE)) {
621 overflow = true;
622 } else {
623 result = result * DECIMAL_DIGITS_ADVANCE + c0_ - '0';
624 }
625 }
626 Advance();
627 }
628 if (overflow) {
629 return INT32_MAX;
630 }
631 return result;
632 }
633
ParserIntervalQuantifier(int * pmin,int * pmax)634 bool RegExpParser::ParserIntervalQuantifier(int *pmin, int *pmax)
635 {
636 // Quantifier::
637 // QuantifierPrefix
638 // QuantifierPrefix?
639 // QuantifierPrefix::
640 // *
641 // +
642 // ?
643 // {DecimalDigits}
644 // {DecimalDigits,}
645 // {DecimalDigits,DecimalDigits}
646 Advance();
647 *pmin = ParseDecimalDigits();
648 *pmax = *pmin;
649 switch (c0_) {
650 case ',': {
651 Advance();
652 if (c0_ == '}') {
653 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
654 PrintF("QuantifierPrefix{DecimalDigits,}\n");
655 *pmax = INT32_MAX;
656 Advance();
657 } else {
658 *pmax = ParseDecimalDigits();
659 if (c0_ == '}') {
660 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
661 PrintF("QuantifierPrefix{DecimalDigits,DecimalDigits}\n");
662 Advance();
663 } else {
664 return false;
665 }
666 }
667 break;
668 }
669 case '}': {
670 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
671 PrintF("QuantifierPrefix{DecimalDigits}\n");
672 Advance();
673 break;
674 }
675 default: {
676 Advance();
677 return false;
678 }
679 }
680 return true;
681 }
682
ParseQuantifier(size_t atomBcStart,int captureStart,int captureEnd)683 void RegExpParser::ParseQuantifier(size_t atomBcStart, int captureStart, int captureEnd)
684 {
685 int min = -1;
686 int max = -1;
687 bool isGreedy = true;
688 switch (c0_) {
689 case '*':
690 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
691 PrintF("QuantifierPrefix %c\n", c0_);
692 min = 0;
693 max = INT32_MAX;
694 Advance();
695 break;
696 case '+':
697 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
698 PrintF("QuantifierPrefix %c\n", c0_);
699 min = 1;
700 max = INT32_MAX;
701 Advance();
702 break;
703 case '?':
704 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
705 PrintF("QuantifierPrefix %c\n", c0_);
706 Advance();
707 min = 0;
708 max = 1;
709 break;
710 case '{': {
711 uint8_t *start = pc_ - 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
712 if (!ParserIntervalQuantifier(&min, &max)) {
713 pc_ = start;
714 Advance(); // back to '{'
715 return;
716 }
717 if (min > max) {
718 ParseError("Invalid repetition count");
719 return;
720 }
721 break;
722 }
723 default:
724 break;
725 }
726 if (c0_ == '?') {
727 isGreedy = false;
728 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
729 PrintF("Quantifier::QuantifierPrefix?\n");
730 Advance();
731 } else if (c0_ == '?' || c0_ == '+' || c0_ == '*' || c0_ == '{') {
732 ParseError("nothing to repeat");
733 return;
734 }
735 if (min != -1 && max != -1) {
736 stackCount_++;
737 PushOpCode pushOp;
738 pushOp.InsertOpCode(&buffer_, atomBcStart);
739 atomBcStart += pushOp.GetSize();
740
741 if (captureStart != 0) {
742 SaveResetOpCode saveResetOp;
743 saveResetOp.InsertOpCode(&buffer_, atomBcStart, captureStart, captureEnd);
744 }
745
746 // zero advance check
747 if (max == INT32_MAX) {
748 stackCount_++;
749 PushCharOpCode pushCharOp;
750 pushCharOp.InsertOpCode(&buffer_, atomBcStart);
751 CheckCharOpCode checkCharOp;
752 // NOLINTNEXTLINE(readability-magic-numbers)
753 checkCharOp.EmitOpCode(&buffer_, RegExpOpCode::GetRegExpOpCode(RegExpOpCode::OP_LOOP)->GetSize());
754 }
755
756 if (isGreedy) {
757 LoopGreedyOpCode loopOp;
758 loopOp.EmitOpCode(&buffer_, atomBcStart - buffer_.GetSize() - loopOp.GetSize(), min, max);
759 } else {
760 LoopOpCode loopOp;
761 loopOp.EmitOpCode(&buffer_, atomBcStart - buffer_.GetSize() - loopOp.GetSize(), min, max);
762 }
763
764 if (min == 0) {
765 if (isGreedy) {
766 SplitNextOpCode splitNextOp;
767 splitNextOp.InsertOpCode(&buffer_, atomBcStart, buffer_.GetSize() - atomBcStart);
768 } else {
769 SplitFirstOpCode splitFirstOp;
770 splitFirstOp.InsertOpCode(&buffer_, atomBcStart, buffer_.GetSize() - atomBcStart);
771 }
772 }
773
774 PopOpCode popOp;
775 popOp.EmitOpCode(&buffer_);
776 }
777 }
778
ParseGroupSpecifier(const uint8_t ** pp,CString & name)779 bool RegExpParser::ParseGroupSpecifier(const uint8_t **pp, CString &name)
780 {
781 const uint8_t *p = *pp;
782 int c = *p;
783 while (c != '>') {
784 if (c < (INT8_MAX + 1)) {
785 if (name.empty()) {
786 if (!g_regexpIdentifyStart.IsContain(c)) {
787 return false;
788 }
789 } else {
790 if (!g_regexpIdentifyContinue.IsContain(c)) {
791 return false;
792 }
793 }
794 name += static_cast<char>(c);
795 }
796 c = *++p; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
797 }
798 p++; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
799 *pp = p;
800 return true;
801 }
802
ParseCaptureCount(const char * groupName)803 int RegExpParser::ParseCaptureCount(const char *groupName)
804 {
805 const uint8_t *p;
806 int captureIndex = 1;
807 CString name;
808 for (p = base_; p < end_; p++) { // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
809 switch (*p) {
810 case '(': {
811 if (p[1] == '?') { // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
812 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
813 if (p[CAPTURE_CONUT_ADVANCE - 1] == '<' && p[CAPTURE_CONUT_ADVANCE] != '!' &&
814 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
815 p[CAPTURE_CONUT_ADVANCE] != '=') {
816 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
817 p += CAPTURE_CONUT_ADVANCE;
818 if (groupName != nullptr) {
819 if (ParseGroupSpecifier(&p, name)) {
820 if (strcmp(name.c_str(), groupName) == 0) {
821 return captureIndex;
822 }
823 }
824 }
825 captureIndex++;
826 }
827 } else {
828 captureIndex++;
829 }
830 break;
831 }
832 case '\\': {
833 p++; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
834 break;
835 }
836 case '[': {
837 while (p < end_ && *p != ']') {
838 if (*p == '\\') {
839 p++; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
840 }
841 p++; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
842 }
843 break;
844 }
845 default:
846 break;
847 }
848 }
849 return captureIndex;
850 }
851
852 // NOLINTNEXTLINE(readability-function-size)
ParseAtomEscape(bool isBackward)853 int RegExpParser::ParseAtomEscape(bool isBackward)
854 {
855 // AtomEscape[U, N]::
856 // DecimalEscape
857 // CharacterClassEscape[?U]
858 // CharacterEscape[?U]
859 // [+N]kGroupName[?U]
860 int result = -1;
861 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
862 PrintF("Parse AtomEscape------\n");
863 switch (c0_) {
864 case KEY_EOF: {
865 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
866 ParseError("unexpected end");
867 break;
868 }
869 // DecimalEscape
870 case '1':
871 case '2':
872 case '3':
873 case '4':
874 case '5':
875 case '6':
876 case '7':
877 case '8':
878 case '9': {
879 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
880 PrintF("NonZeroDigit %c\n", c0_);
881 int capture = ParseDecimalDigits();
882 if (capture > captureCount_ - 1 && capture > ParseCaptureCount(nullptr) - 1) {
883 ParseError("invalid backreference count");
884 break;
885 }
886 if (isBackward) {
887 BackwardBackReferenceOpCode backReferenceOp;
888 backReferenceOp.EmitOpCode(&buffer_, capture);
889 } else {
890 BackReferenceOpCode backReferenceOp;
891 backReferenceOp.EmitOpCode(&buffer_, capture);
892 }
893 break;
894 }
895 // CharacterClassEscape
896 case 'd': {
897 // [0-9]
898 RangeOpCode rangeOp;
899 rangeOp.InsertOpCode(&buffer_, g_rangeD);
900 Advance();
901 break;
902 }
903 case 'D': {
904 // [^0-9]
905 RangeSet atomRange(g_rangeD);
906 atomRange.Invert(IsUtf16());
907 Range32OpCode rangeOp;
908 rangeOp.InsertOpCode(&buffer_, atomRange);
909 Advance();
910 break;
911 }
912 case 's': {
913 // [\f\n\r\t\v]
914 RangeOpCode rangeOp;
915 rangeOp.InsertOpCode(&buffer_, g_rangeS);
916 Advance();
917 break;
918 }
919 case 'S': {
920 RangeSet atomRange(g_rangeS);
921 atomRange.Invert(IsUtf16());
922 Range32OpCode rangeOp;
923 rangeOp.InsertOpCode(&buffer_, atomRange);
924 Advance();
925 break;
926 }
927 case 'w': {
928 // [A-Za-z0-9]
929 RangeOpCode rangeOp;
930 rangeOp.InsertOpCode(&buffer_, g_rangeW);
931 Advance();
932 break;
933 }
934 case 'W': {
935 // [^A-Za-z0-9]
936 RangeSet atomRange(g_rangeW);
937 atomRange.Invert(IsUtf16());
938 Range32OpCode rangeOp;
939 rangeOp.InsertOpCode(&buffer_, atomRange);
940 Advance();
941 break;
942 }
943 // P{UnicodePropertyValueExpression}
944 // p{UnicodePropertyValueExpression}
945 case 'P':
946 case 'p':
947 // [+N]kGroupName[?U]
948 case 'k':
949 default: {
950 result = ParseCharacterEscape();
951 break;
952 }
953 }
954 return result;
955 }
956
ParseCharacterEscape()957 int RegExpParser::ParseCharacterEscape()
958 {
959 // CharacterEscape[U]::
960 // ControlEscape
961 // c ControlLetter
962 // 0 [lookahead ∉ DecimalDigit]
963 // HexEscapeSequence
964 // RegExpUnicodeEscapeSequence[?U]
965 // IdentityEscape[?U]
966 uint32_t result = 0;
967 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
968 switch (c0_) {
969 // ControlEscape
970 case 'f': {
971 result = '\f';
972 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
973 PrintF("ControlEscape %c\n", c0_);
974 Advance();
975 break;
976 }
977 case 'n': {
978 result = '\n';
979 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
980 PrintF("ControlEscape %c\n", c0_);
981 Advance();
982 break;
983 }
984 case 'r': {
985 result = '\r';
986 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
987 PrintF("ControlEscape %c\n", c0_);
988 Advance();
989 break;
990 }
991 case 't': {
992 result = '\t';
993 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
994 PrintF("ControlEscape %c\n", c0_);
995 Advance();
996 break;
997 }
998 case 'v': {
999 result = '\v';
1000 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1001 PrintF("ControlEscape %c\n", c0_);
1002 Advance();
1003 break;
1004 }
1005 // c ControlLetter
1006 case 'c': {
1007 Advance();
1008 if ((c0_ >= 'A' && c0_ <= 'Z') || (c0_ >= 'a' && c0_ <= 'z')) {
1009 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1010 PrintF("ControlLetter %c\n", c0_);
1011 result = static_cast<uint32_t>(c0_) & 0x1f; // NOLINTNEXTLINE(readability-magic-numbers)
1012 Advance();
1013 } else {
1014 if (!IsUtf16()) {
1015 pc_--; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1016 result = '\\';
1017 } else {
1018 ParseError("Invalid control letter");
1019 return -1;
1020 }
1021 }
1022 break;
1023 }
1024 case '0': {
1025 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1026 PrintF("CharacterEscape 0 [lookahead ∉ DecimalDigit]\n");
1027 if (IsUtf16() && !(*pc_ >= '0' && *pc_ <= '9')) { // NOLINTNEXTLINE(readability-magic-numbers)
1028 Advance();
1029 result = 0;
1030 break;
1031 }
1032 [[fallthrough]];
1033 }
1034 case '1':
1035 case '2':
1036 case '3':
1037 case '4':
1038 case '5':
1039 case '6':
1040 case '7': {
1041 if (IsUtf16()) {
1042 // With /u, decimal escape is not interpreted as octal character code.
1043 ParseError("Invalid class escape");
1044 return 0;
1045 }
1046 result = ParseOctalLiteral();
1047 break;
1048 }
1049 // ParseHexEscapeSequence
1050 // ParseRegExpUnicodeEscapeSequence
1051 case 'x': {
1052 Advance();
1053 if (ParseHexEscape(UNICODE_HEX_ADVANCE, &result)) {
1054 return result;
1055 }
1056 if (IsUtf16()) {
1057 ParseError("Invalid class escape");
1058 return -1;
1059 }
1060 result = 'x';
1061 break;
1062 }
1063 case 'u': {
1064 Advance();
1065 if (ParseUnicodeEscape(&result)) {
1066 return result;
1067 }
1068 if (IsUtf16()) {
1069 // With /u, invalid escapes are not treated as identity escapes.
1070 ParseError("Invalid unicode escape");
1071 return 0;
1072 }
1073 // If \u is not followed by a two-digit hexadecimal, treat it
1074 // as an identity escape.
1075 result = 'u';
1076 break;
1077 }
1078 // IdentityEscape[?U]
1079 case '$':
1080 case '(':
1081 case ')':
1082 case '*':
1083 case '+':
1084 case '.':
1085 case '/':
1086 case '?':
1087 case '[':
1088 case '\\':
1089 case ']':
1090 case '^':
1091 case '{':
1092 case '|':
1093 case '}': {
1094 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1095 PrintF("IdentityEscape %c\n", c0_);
1096 result = c0_;
1097 Advance();
1098 break;
1099 }
1100 default: {
1101 if (IsUtf16()) {
1102 ParseError("Invalid unicode escape");
1103 return 0;
1104 }
1105 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1106 PrintF("SourceCharacter %c\n", c0_);
1107 result = c0_;
1108 Advance();
1109 break;
1110 }
1111 }
1112 return result;
1113 }
1114
ParseClassRanges(RangeSet * result)1115 bool RegExpParser::ParseClassRanges(RangeSet *result)
1116 {
1117 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1118 PrintF("Parse ClassRanges------\n");
1119 while (c0_ != ']') {
1120 RangeSet s1;
1121 uint32_t c1 = ParseClassAtom(&s1);
1122 if (c1 == UINT32_MAX) {
1123 ParseError("invalid class range");
1124 return false;
1125 }
1126
1127 int next_c0 = *pc_;
1128 if (c0_ == '-' && next_c0 != ']') {
1129 if (c1 == CLASS_RANGE_BASE) {
1130 if (IsUtf16()) {
1131 ParseError("invalid class range");
1132 return false;
1133 }
1134 result->Insert(s1);
1135 continue;
1136 }
1137 Advance();
1138 RangeSet s2;
1139 uint32_t c2 = ParseClassAtom(&s2);
1140 if (c2 == UINT32_MAX) {
1141 ParseError("invalid class range");
1142 return false;
1143 }
1144 if (c2 == CLASS_RANGE_BASE) {
1145 if (IsUtf16()) {
1146 ParseError("invalid class range");
1147 return false;
1148 }
1149 result->Insert(s2);
1150 continue;
1151 }
1152
1153 if (c1 > c2) {
1154 ParseError("invalid class range");
1155 return false;
1156 }
1157 if (IsIgnoreCase()) {
1158 c1 = Canonicalize(c1, IsUtf16());
1159 c2 = Canonicalize(c2, IsUtf16());
1160 }
1161
1162 result->Insert(c1, c2);
1163 } else {
1164 result->Insert(s1);
1165 }
1166 }
1167 Advance();
1168 return true;
1169 }
1170
ParseClassAtom(RangeSet * atom)1171 uint32_t RegExpParser::ParseClassAtom(RangeSet *atom)
1172 {
1173 uint32_t ret = UINT32_MAX;
1174 switch (c0_) {
1175 case '\\': {
1176 Advance();
1177 ret = ParseClassEscape(atom);
1178 break;
1179 }
1180 case KEY_EOF:
1181 break;
1182 case 0: {
1183 if (pc_ >= end_) {
1184 return UINT32_MAX;
1185 }
1186 [[fallthrough]];
1187 }
1188 default: {
1189 uint32_t value = c0_;
1190 int u16_size = 0;
1191 if (c0_ > INT8_MAX) { // NOLINTNEXTLINE(readability-magic-numbers)
1192 pc_ -= 1; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1193 auto u16_result = base::utf_helper::ConvertUtf8ToUtf16Pair(pc_, true);
1194 value = u16_result.first;
1195 u16_size = u16_result.second;
1196 Advance(u16_size + 1);
1197 } else {
1198 Advance();
1199 }
1200 if (IsIgnoreCase()) {
1201 value = Canonicalize(value, IsUtf16());
1202 }
1203 atom->Insert(RangeSet(value));
1204 ret = value;
1205 break;
1206 }
1207 }
1208 return ret;
1209 }
1210
ParseClassEscape(RangeSet * atom)1211 int RegExpParser::ParseClassEscape(RangeSet *atom)
1212 {
1213 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1214 PrintF("Parse ClassEscape------\n");
1215 int result = -1;
1216 switch (c0_) {
1217 case 'b': {
1218 Advance();
1219 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1220 PrintF("ClassEscape %c", 'b');
1221 result = '\b';
1222 atom->Insert(RangeSet(static_cast<uint32_t>('\b')));
1223 break;
1224 }
1225 case '-': {
1226 Advance();
1227 result = '-';
1228 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1229 PrintF("ClassEscape %c", '-');
1230 atom->Insert(RangeSet(static_cast<uint32_t>('-')));
1231 break;
1232 }
1233 // CharacterClassEscape
1234 case 'd':
1235 case 'D': {
1236 result = CLASS_RANGE_BASE;
1237 atom->Insert(g_rangeD);
1238 if (c0_ == 'D') {
1239 atom->Invert(IsUtf16());
1240 }
1241 Advance();
1242 break;
1243 }
1244 case 's':
1245 case 'S': {
1246 result = CLASS_RANGE_BASE;
1247 atom->Insert(g_rangeS);
1248 if (c0_ == 'S') {
1249 atom->Invert(IsUtf16());
1250 }
1251 Advance();
1252 break;
1253 }
1254 case 'w':
1255 case 'W': {
1256 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1257 PrintF("ClassEscape::CharacterClassEscape %c\n", c0_);
1258 result = CLASS_RANGE_BASE;
1259 atom->Insert(g_rangeW);
1260 if (c0_ == 'W') {
1261 atom->Invert(IsUtf16());
1262 }
1263 Advance();
1264 break;
1265 }
1266 // P{UnicodePropertyValueExpression}
1267 // p{UnicodePropertyValueExpression}
1268 case 'P':
1269 case 'p': {
1270 Advance();
1271 if (c0_ == '{') {
1272 Advance();
1273 bool isValue = false;
1274 ParseUnicodePropertyValueCharacters(&isValue);
1275 if (!isValue) {
1276 ParseUnicodePropertyValueCharacters(&isValue);
1277 }
1278 }
1279 break;
1280 }
1281 default: {
1282 result = ParseCharacterEscape();
1283 int value = result;
1284 if (IsIgnoreCase()) {
1285 value = Canonicalize(value, IsUtf16());
1286 }
1287 atom->Insert(RangeSet(static_cast<uint32_t>(value)));
1288 break;
1289 }
1290 }
1291 return result;
1292 }
1293
ParseUnicodePropertyValueCharacters(bool * isValue)1294 void RegExpParser::ParseUnicodePropertyValueCharacters(bool *isValue)
1295 {
1296 if ((c0_ >= 'A' && c0_ <= 'Z') || (c0_ >= 'a' && c0_ <= 'z')) {
1297 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1298 PrintF("UnicodePropertyCharacter::ControlLetter %c\n", c0_);
1299 Advance();
1300 } else if (c0_ == '-') {
1301 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1302 PrintF("UnicodePropertyCharacter:: - \n");
1303 Advance();
1304 } else if (c0_ >= '0' && c0_ <= '9') {
1305 *isValue = true;
1306 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1307 PrintF("UnicodePropertyValueCharacter::DecimalDigit %c\n", c0_);
1308 Advance();
1309 } else if (*isValue && c0_ == '}') {
1310 Advance();
1311 return;
1312 } else if (!*isValue && c0_ == '=') {
1313 Advance();
1314 return;
1315 }
1316 ParseUnicodePropertyValueCharacters(isValue);
1317 }
1318
1319 // NOLINTNEXTLINE(cert-dcl50-cpp)
PrintF(const char * fmt,...)1320 void RegExpParser::PrintF(const char *fmt, ...)
1321 {
1322 #ifndef _NO_DEBUG_
1323 va_list args;
1324 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,)
1325 va_start(args, fmt);
1326 vprintf(fmt, args);
1327 va_end(args);
1328 #else
1329 (void)fmt;
1330 #endif
1331 }
1332
ParseError(const char * errorMessage)1333 void RegExpParser::ParseError(const char *errorMessage)
1334 {
1335 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1336 PrintF("error: ");
1337 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1338 PrintF(errorMessage);
1339 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
1340 PrintF("\n");
1341 SetIsError();
1342 size_t length = strlen(errorMessage) + 1;
1343 if (memcpy_s(errorMsg_, length, errorMessage, length) != EOK) {
1344 LOG_ECMA(FATAL) << "memcpy_s failed";
1345 UNREACHABLE();
1346 }
1347 }
1348 } // namespace panda::ecmascript
1349