1 /** 2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16 #ifndef ES2PANDA_PARSER_CORE_REGEXP_H 17 #define ES2PANDA_PARSER_CORE_REGEXP_H 18 19 #include "util/enumbitops.h" 20 #include "util/ustring.h" 21 22 #include <unordered_set> 23 24 namespace ark::es2panda::lexer { 25 26 using ENUMBITOPS_OPERATORS; 27 28 enum class RegExpFlags : uint32_t { 29 EMPTY = 0U, 30 GLOBAL = 1U << 0U, 31 IGNORE_CASE = 1U << 1U, 32 MULTILINE = 1U << 2U, 33 DOTALL = 1U << 3U, 34 UNICODE = 1U << 4U, 35 STICKY = 1U << 5U, 36 }; 37 38 class RegExpError : std::exception { 39 public: 40 explicit RegExpError(std::string_view m); 41 42 // NOLINTBEGIN(misc-non-private-member-variables-in-classes) 43 std::string message; 44 // NOLINTEND(misc-non-private-member-variables-in-classes) 45 }; 46 47 struct RegExp { 48 RegExp(util::StringView p, util::StringView f, RegExpFlags reFlags); 49 50 // NOLINTBEGIN(misc-non-private-member-variables-in-classes) 51 util::StringView patternStr; 52 util::StringView flagsStr; 53 RegExpFlags flags; 54 // NOLINTEND(misc-non-private-member-variables-in-classes) 55 }; 56 57 class RegExpParser { 58 public: 59 explicit RegExpParser(const RegExp &re, ArenaAllocator *allocator); 60 void ParsePattern(); 61 62 private: 63 void ParseDisjunction(); 64 void ParseAlternatives(); 65 void ParseAlternative(); 66 bool ParseAlternativeCharLeftParen(); 67 68 void ParseNonCapturingGroup(); 69 void ParseNamedCapturingGroup(); 70 void ParseCapturingGroup(); 71 72 void ParseAssertion(); 73 char32_t ParseClassAtom(); 74 void ParseCharacterClass(); 75 void ParseAtomEscape(); 76 void ParseAtomEscapeSwitch(char32_t cp); 77 78 uint32_t ParseControlEscape(); 79 uint32_t ParseDecimalEscape(); 80 uint32_t ParseLegacyOctalEscape(); 81 uint32_t ParseHexEscape(); 82 uint32_t ParseUnicodeDigits(); 83 uint32_t ParseUnicodeEscape(); 84 85 void ParseUnicodePropertyEscape(); 86 void ValidateNamedBackreference(bool isUnicode); 87 void ValidateGroupNameElement(char32_t cp); 88 void ParseNamedBackreference(); 89 90 void ParseQuantifier(); 91 bool ParseBracedQuantifier(); 92 93 bool IsSyntaxCharacter(char32_t cp) const; 94 bool ParsePatternCharacter(); 95 96 util::StringView ParseIdent(); 97 98 bool Unicode() const; 99 100 char32_t Peek() const; 101 char32_t Next(); 102 void Advance(); 103 bool Eos() const; 104 105 RegExp re_; 106 ArenaAllocator *allocator_ {}; 107 util::StringView::Iterator iter_; 108 uint32_t capturingGroupCount_ {}; 109 std::unordered_set<util::StringView> groupNames_; 110 std::unordered_set<util::StringView> backReferences_; 111 }; 112 } // namespace ark::es2panda::lexer 113 114 template <> 115 struct enumbitops::IsAllowedType<ark::es2panda::lexer::RegExpFlags> : std::true_type { 116 }; 117 118 #endif 119