• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef ES2PANDA_PARSER_CORE_REGEXP_H
17 #define ES2PANDA_PARSER_CORE_REGEXP_H
18 
19 #include <util/enumbitops.h>
20 #include <util/ustring.h>
21 
22 #include <unordered_set>
23 
24 namespace panda::es2panda::lexer {
25 
26 enum class RegExpFlags {
27     EMPTY = 0,
28     GLOBAL = 1 << 0,
29     IGNORE_CASE = 1 << 1,
30     MULTILINE = 1 << 2,
31     STICKY = 1 << 3,
32     UNICODE = 1 << 4,
33     DOTALL = 1 << 5,
34 };
35 
DEFINE_BITOPS(RegExpFlags)36 DEFINE_BITOPS(RegExpFlags)
37 
38 class RegExpError : std::exception {
39 public:
40     explicit RegExpError(std::string_view m);
41     std::string message;
42 };
43 
44 struct RegExp {
45     RegExp(util::StringView p, util::StringView f, RegExpFlags reFlags);
46 
47     util::StringView patternStr;
48     util::StringView flagsStr;
49     RegExpFlags flags;
50 };
51 
52 class RegExpParser {
53 public:
54     explicit RegExpParser(const RegExp &re, ArenaAllocator *allocator);
55     void ParsePattern();
56 
57 private:
58     void ParseDisjunction();
59     void ParseAlternatives();
60     void ParseAlternative();
61 
62     void ParseNonCapturingGroup();
63     void ParseNamedCapturingGroup();
64     void ParseCapturingGroup();
65 
66     void ParseAssertion();
67     char32_t ParseClassAtom();
68     void ParseCharacterClass();
69     void ParseAtomEscape();
70 
71     uint32_t ParseControlEscape();
72     uint32_t ParseDecimalEscape();
73     uint32_t ParseLegacyOctalEscape();
74     uint32_t ParseHexEscape();
75     uint32_t ParseUnicodeDigits();
76     uint32_t ParseUnicodeEscape();
77 
78     void ParseUnicodePropertyEscape();
79     void ParseNamedBackreference();
80 
81     void ParseQuantifier();
82     bool ParseBracedQuantifier();
83 
84     bool IsSyntaxCharacter(char32_t cp) const;
85     bool ParsePatternCharacter();
86 
87     util::StringView ParseIdent();
88 
89     bool Unicode() const;
90 
91     char32_t Peek() const;
92     char32_t Next();
93     void Advance();
94     bool Eos() const;
95     void ValidateNamedGroupReferences();
96 
97     RegExp re_;
98     ArenaAllocator *allocator_ {};
99     util::StringView::Iterator iter_;
100     uint32_t capturingGroupCount_;
101     std::unordered_set<util::StringView> groupNames_;
102     std::unordered_set<util::StringView> namedGroupReferences_;
103 };
104 
105 }  // namespace panda::es2panda::lexer
106 
107 #endif
108