1 /** 2 * Copyright (c) 2024-2025 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef ES2PANDA_PARSER_CORE_REGEXP_H 17 #define ES2PANDA_PARSER_CORE_REGEXP_H 18 19 #include "util/enumbitops.h" 20 #include "util/ustring.h" 21 22 #include "parser/parserImpl.h" 23 24 #include <unordered_set> 25 26 namespace ark::es2panda::lexer { 27 28 using ENUMBITOPS_OPERATORS; 29 30 enum class RegExpFlags : uint32_t { 31 EMPTY = 0U, 32 GLOBAL = 1U << 0U, 33 IGNORE_CASE = 1U << 1U, 34 MULTILINE = 1U << 2U, 35 DOTALL = 1U << 3U, 36 UNICODE = 1U << 4U, 37 STICKY = 1U << 5U, 38 }; 39 40 struct RegExp { 41 RegExp(util::StringView p, util::StringView f, RegExpFlags reFlags); 42 43 // NOLINTBEGIN(misc-non-private-member-variables-in-classes) 44 util::StringView patternStr; 45 util::StringView flagsStr; 46 RegExpFlags flags; 47 // NOLINTEND(misc-non-private-member-variables-in-classes) 48 }; 49 50 class RegExpParser { 51 public: 52 explicit RegExpParser(const RegExp &re, ArenaAllocator *allocator, parser::ParserImpl *parser); 53 void ParsePattern(); 54 55 private: 56 void ParseDisjunction(); 57 void ParseAlternatives(); 58 void ParseAlternative(); 59 bool ParseAlternativeCharLeftParen(); 60 61 void ParseNonCapturingGroup(); 62 void ParseNamedCapturingGroup(); 63 void ParseCapturingGroup(); 64 65 void ParseAssertion(); 66 char32_t ParseClassAtomHelper(char32_t cp); 67 char32_t ParseClassAtom(); 68 void ParseCharacterClass(); 69 void ParseAtomEscape(); 70 void ParseAtomEscapeSwitch(char32_t cp); 71 72 uint32_t ParseControlEscape(); 73 uint32_t ParseDecimalEscape(); 74 uint32_t ParseLegacyOctalEscape(); 75 uint32_t ParseHexEscape(); 76 uint32_t ParseUnicodeDigits(); 77 uint32_t ParseUnicodeEscape(); 78 79 void ParseUnicodePropertyEscape(); 80 void ValidateNamedBackreference(bool isUnicode); 81 void ValidateGroupNameElement(char32_t cp); 82 void ParseNamedBackreference(); 83 84 void ParseQuantifier(); 85 bool ParseBracedQuantifier(); 86 87 bool IsSyntaxCharacter(char32_t cp) const; 88 bool ParsePatternCharacter(); 89 90 util::StringView ParseIdent(); 91 92 bool Unicode() const; 93 94 char32_t Peek() const; 95 char32_t Next(); 96 void Advance(); 97 bool Eos() const; 98 99 RegExp re_; 100 ArenaAllocator *allocator_ {}; 101 util::StringView::Iterator iter_; 102 uint32_t capturingGroupCount_ {}; 103 std::unordered_set<util::StringView> groupNames_; 104 std::unordered_set<util::StringView> backReferences_; 105 es2panda::parser::ParserImpl *parser_; 106 }; 107 } // namespace ark::es2panda::lexer 108 109 template <> 110 struct enumbitops::IsAllowedType<ark::es2panda::lexer::RegExpFlags> : std::true_type { 111 }; 112 113 #endif 114