1 /**
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #ifndef ES2PANDA_PARSER_CORE_REGEXP_H
17 #define ES2PANDA_PARSER_CORE_REGEXP_H
18
19 #include <util/enumbitops.h>
20 #include <util/ustring.h>
21
22 #include <unordered_set>
23
24 namespace panda::es2panda::lexer {
25
26 enum class RegExpFlags {
27 EMPTY = 0,
28 GLOBAL = 1 << 0,
29 IGNORE_CASE = 1 << 1,
30 MULTILINE = 1 << 2,
31 STICKY = 1 << 3,
32 UNICODE = 1 << 4,
33 DOTALL = 1 << 5,
34 };
35
DEFINE_BITOPS(RegExpFlags)36 DEFINE_BITOPS(RegExpFlags)
37
38 class RegExpError : std::exception {
39 public:
40 explicit RegExpError(std::string_view m);
41 std::string message;
42 };
43
44 struct RegExp {
45 RegExp(util::StringView p, util::StringView f, RegExpFlags reFlags);
46
47 util::StringView patternStr;
48 util::StringView flagsStr;
49 RegExpFlags flags;
50 };
51
52 class RegExpParser {
53 public:
54 explicit RegExpParser(const RegExp &re, ArenaAllocator *allocator);
55 void ParsePattern();
56
57 private:
58 void ParseDisjunction();
59 void ParseAlternatives();
60 void ParseAlternative();
61
62 void ParseNonCapturingGroup();
63 void ParseNamedCapturingGroup();
64 void ParseCapturingGroup();
65
66 void ParseAssertion();
67 char32_t ParseClassAtom();
68 void ParseCharacterClass();
69 void ParseAtomEscape();
70
71 uint32_t ParseControlEscape();
72 uint32_t ParseDecimalEscape();
73 uint32_t ParseLegacyOctalEscape();
74 uint32_t ParseHexEscape();
75 uint32_t ParseUnicodeDigits();
76 uint32_t ParseUnicodeEscape();
77
78 void ParseUnicodePropertyEscape();
79 void ParseNamedBackreference();
80
81 void ParseQuantifier();
82 bool ParseBracedQuantifier();
83
84 bool IsSyntaxCharacter(char32_t cp) const;
85 bool ParsePatternCharacter();
86
87 util::StringView ParseIdent();
88
89 bool Unicode() const;
90
91 char32_t Peek() const;
92 char32_t Next();
93 void Advance();
94 bool Eos() const;
95 void ValidateNamedGroupReferences();
96
97 RegExp re_;
98 ArenaAllocator *allocator_ {};
99 util::StringView::Iterator iter_;
100 uint32_t capturingGroupCount_;
101 std::unordered_set<util::StringView> groupNames_;
102 std::unordered_set<util::StringView> namedGroupReferences_;
103 };
104
105 } // namespace panda::es2panda::lexer
106
107 #endif
108