1 /**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #ifndef ES2PANDA_PARSER_CORE_REGEXP_H
17 #define ES2PANDA_PARSER_CORE_REGEXP_H
18
19 #include <util/enumbitops.h>
20 #include <util/ustring.h>
21
22 #include <unordered_set>
23
24 namespace panda::es2panda::lexer {
25
26 enum class RegExpFlags : uint8_t {
27 EMPTY = 0,
28 GLOBAL = 1 << 0,
29 IGNORE_CASE = 1 << 1,
30 MULTILINE = 1 << 2,
31 STICKY = 1 << 3,
32 UNICODE = 1 << 4,
33 DOTALL = 1 << 5,
34 HAS_INDICES = 1 << 6,
35 };
36
DEFINE_BITOPS(RegExpFlags)37 DEFINE_BITOPS(RegExpFlags)
38
39 class RegExpError : std::exception {
40 public:
41 explicit RegExpError(const std::string_view &m);
42 std::string message;
43 };
44
45 struct RegExp {
46 RegExp(util::StringView p, util::StringView f, RegExpFlags reFlags);
47
48 util::StringView patternStr;
49 util::StringView flagsStr;
50 RegExpFlags flags;
51 };
52
53 class RegExpParser {
54 public:
55 explicit RegExpParser(const RegExp &re, ArenaAllocator *allocator);
56 void ParsePattern();
57
58 private:
59 void ParseDisjunction();
60 void ParseAlternatives();
61 void ParseAlternative();
62
63 void ParseNonCapturingGroup();
64 void ParseNamedCapturingGroup();
65 void ParseCapturingGroup();
66
67 void ParseAssertion();
68 char32_t ParseClassAtom();
69 void ParseCharacterClass();
70 void ParseAtomEscape();
71
72 uint32_t ParseControlEscape();
73 uint32_t ParseDecimalEscape();
74 uint32_t ParseLegacyOctalEscape();
75 uint32_t ParseHexEscape();
76 uint32_t ParseUnicodeDigits();
77 uint32_t ParseUnicodeEscape();
78
79 void ParseUnicodePropertyEscape();
80 void ParseNamedBackreference();
81
82 void ParseQuantifier();
83 bool ParseBracedQuantifier();
84
85 bool IsSyntaxCharacter(char32_t cp) const;
86 bool ParsePatternCharacter();
87
88 util::StringView ParseIdent();
89
90 bool Unicode() const;
91
92 char32_t Peek() const;
93 char32_t Next();
94 void Advance();
95 bool Eos() const;
96 void ValidateNamedGroupReferences();
97
98 RegExp re_;
99 ArenaAllocator *allocator_ {};
100 util::StringView::Iterator iter_;
101 uint32_t capturingGroupCount_;
102 std::unordered_set<util::StringView> groupNames_;
103 std::unordered_set<util::StringView> namedGroupReferences_;
104 };
105
106 } // namespace panda::es2panda::lexer
107
108 #endif
109