• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef ES2PANDA_PARSER_CORE_REGEXP_H
17 #define ES2PANDA_PARSER_CORE_REGEXP_H
18 
19 #include "util/enumbitops.h"
20 #include "util/ustring.h"
21 
22 #include <unordered_set>
23 
24 namespace ark::es2panda::lexer {
25 
26 using ENUMBITOPS_OPERATORS;
27 
28 enum class RegExpFlags : uint32_t {
29     EMPTY = 0U,
30     GLOBAL = 1U << 0U,
31     IGNORE_CASE = 1U << 1U,
32     MULTILINE = 1U << 2U,
33     DOTALL = 1U << 3U,
34     UNICODE = 1U << 4U,
35     STICKY = 1U << 5U,
36 };
37 
38 class RegExpError : std::exception {
39 public:
40     explicit RegExpError(std::string_view m);
41 
42     // NOLINTBEGIN(misc-non-private-member-variables-in-classes)
43     std::string message;
44     // NOLINTEND(misc-non-private-member-variables-in-classes)
45 };
46 
47 struct RegExp {
48     RegExp(util::StringView p, util::StringView f, RegExpFlags reFlags);
49 
50     // NOLINTBEGIN(misc-non-private-member-variables-in-classes)
51     util::StringView patternStr;
52     util::StringView flagsStr;
53     RegExpFlags flags;
54     // NOLINTEND(misc-non-private-member-variables-in-classes)
55 };
56 
57 class RegExpParser {
58 public:
59     explicit RegExpParser(const RegExp &re, ArenaAllocator *allocator);
60     void ParsePattern();
61 
62 private:
63     void ParseDisjunction();
64     void ParseAlternatives();
65     void ParseAlternative();
66     bool ParseAlternativeCharLeftParen();
67 
68     void ParseNonCapturingGroup();
69     void ParseNamedCapturingGroup();
70     void ParseCapturingGroup();
71 
72     void ParseAssertion();
73     char32_t ParseClassAtom();
74     void ParseCharacterClass();
75     void ParseAtomEscape();
76     void ParseAtomEscapeSwitch(char32_t cp);
77 
78     uint32_t ParseControlEscape();
79     uint32_t ParseDecimalEscape();
80     uint32_t ParseLegacyOctalEscape();
81     uint32_t ParseHexEscape();
82     uint32_t ParseUnicodeDigits();
83     uint32_t ParseUnicodeEscape();
84 
85     void ParseUnicodePropertyEscape();
86     void ValidateNamedBackreference(bool isUnicode);
87     void ValidateGroupNameElement(char32_t cp);
88     void ParseNamedBackreference();
89 
90     void ParseQuantifier();
91     bool ParseBracedQuantifier();
92 
93     bool IsSyntaxCharacter(char32_t cp) const;
94     bool ParsePatternCharacter();
95 
96     util::StringView ParseIdent();
97 
98     bool Unicode() const;
99 
100     char32_t Peek() const;
101     char32_t Next();
102     void Advance();
103     bool Eos() const;
104 
105     RegExp re_;
106     ArenaAllocator *allocator_ {};
107     util::StringView::Iterator iter_;
108     uint32_t capturingGroupCount_ {};
109     std::unordered_set<util::StringView> groupNames_;
110     std::unordered_set<util::StringView> backReferences_;
111 };
112 }  // namespace ark::es2panda::lexer
113 
114 template <>
115 struct enumbitops::IsAllowedType<ark::es2panda::lexer::RegExpFlags> : std::true_type {
116 };
117 
118 #endif
119